diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index e691edf9117..bfb4c05fedf 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -398,7 +398,12 @@ def AIE_CoreOp: AIE_Op<"core", [ let arguments = ( ins Index:$tile, DefaultValuedAttr:$stack_size, + // Deprecated: attach link_with to func.func declarations instead and run + // aie-assign-core-link-files to populate link_files. OptionalAttr:$link_with, + // Populated by aie-assign-core-link-files; consumed by BCF/ldscript emitters + // and the aiecc driver. Specifying both link_with and link_files is an error. + OptionalAttr:$link_files, OptionalAttr:$elf_file, OptionalAttr:$dynamic_objfifo_lowering ); @@ -423,6 +428,18 @@ def AIE_CoreOp: AIE_Op<"core", [ This op has an optional `dynamic_objfifo_lowering` attribute, to finely control whether the objectfifos in this core should be lowered using the dynamic runtime lowering. + **External object files.** The preferred mechanism is to attach a `link_with` + string attribute to each `func.func` declaration for an externally-defined + function, then run the `aie-assign-core-link-files` pass. That pass traces + direct `func.call` edges from each core and writes the aggregated, de-duplicated + list of object file paths into the `link_files` attribute on this op. The + BCF/ldscript emitters and the aiecc driver consume `link_files`. + + The core-level `link_with` attribute is deprecated and kept only for + backward compatibility. It is migrated by `aie-assign-core-link-files` + (its value is folded into `link_files` and then removed). Specifying both + `link_with` and `link_files` on the same CoreOp is a verifier error. + Examples: ``` %tile = aie.tile(1, 1) diff --git a/include/aie/Dialect/AIE/Transforms/AIEPasses.h b/include/aie/Dialect/AIE/Transforms/AIEPasses.h index fbee2c82429..60e56cbfbff 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEPasses.h +++ b/include/aie/Dialect/AIE/Transforms/AIEPasses.h @@ -29,6 +29,8 @@ createAIEAssignBufferAddressesPass(); std::unique_ptr> createAIEAssignBufferAddressesPass( const AIEAssignBufferAddressesOptions &options); +std::unique_ptr> +createAIEAssignCoreLinkFilesPass(); std::unique_ptr> createAIEAssignLockIDsPass(); std::unique_ptr> createAIECanonicalizeDevicePass(); diff --git a/include/aie/Dialect/AIE/Transforms/AIEPasses.td b/include/aie/Dialect/AIE/Transforms/AIEPasses.td index 1ee8bc3f0dd..29e8a0d3b59 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEPasses.td +++ b/include/aie/Dialect/AIE/Transforms/AIEPasses.td @@ -13,6 +13,35 @@ include "mlir/Pass/PassBase.td" +def AIEAssignCoreLinkFiles : Pass<"aie-assign-core-link-files", "DeviceOp"> { + let summary = + "Infer per-core link_files from func-level link_with attributes"; + let description = [{ + Walks each aie.core and collects the set of external object files it needs + by tracing direct func.call edges to func.func declarations that carry a + "link_with" string attribute. The result is stored in the CoreOp's + "link_files" StrArrayAttr. + + Only direct calls (func.call) are resolved. Indirect calls + (func.call_indirect) inside a core body emit a warning and are not + resolved; add a direct func.call to the required func.func declaration + so the pass can trace the dependency. + + Core-level "link_with" (deprecated) is also migrated: its value is + folded into the set and the attribute is removed from the CoreOp. + + func.func declarations that carry "link_with" but are never called from + any core emit a warning; their object files will not appear in any + core's link_files. + }]; + + let constructor = "xilinx::AIE::createAIEAssignCoreLinkFilesPass()"; + let dependentDialects = [ + "mlir::func::FuncDialect", + "xilinx::AIE::AIEDialect", + ]; +} + def AIEAssignBufferAddresses : Pass<"aie-assign-buffer-addresses", "DeviceOp"> { let summary = "Assign memory locations for buffers in each tile"; let description = [{ diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index 52d92666316..da56c9a8277 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -1741,6 +1741,10 @@ LogicalResult CoreOp::verify() { "(consist of exactly one `aie.end` op)."); } } + if (getLinkWith() && getLinkFiles()) + return emitOpError( + "cannot specify both 'link_with' (deprecated) and 'link_files' " + "on the same core; run aie-assign-core-link-files to migrate"); return success(); } diff --git a/lib/Dialect/AIE/Transforms/AIEAssignCoreLinkFiles.cpp b/lib/Dialect/AIE/Transforms/AIEAssignCoreLinkFiles.cpp new file mode 100644 index 00000000000..a0f4d712b6a --- /dev/null +++ b/lib/Dialect/AIE/Transforms/AIEAssignCoreLinkFiles.cpp @@ -0,0 +1,123 @@ +//===- AIEAssignCoreLinkFiles.cpp -------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices Inc. +// +//===----------------------------------------------------------------------===// +// +// This pass infers the per-core set of external object files required for +// linking by tracing call edges from each core to func.func declarations that +// carry a "link_with" attribute. +// +// After the pass runs, every CoreOp that needs external files will have a +// "link_files" StrArrayAttr containing the (de-duplicated) list of .o paths. +// +// Core-level "link_with" (deprecated) is also migrated: its value is added to +// the set and the attribute is removed from the CoreOp. +// +//===----------------------------------------------------------------------===// + +#include "aie/Dialect/AIE/IR/AIEDialect.h" +#define GEN_PASS_DEF_AIEASSIGNCORELINKFILES +#include "aie/Dialect/AIE/Transforms/AIEPasses.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" + +#define DEBUG_TYPE "aie-assign-core-link-files" + +using namespace mlir; +using namespace xilinx; +using namespace xilinx::AIE; + +struct AIEAssignCoreLinkFilesPass + : xilinx::AIE::impl::AIEAssignCoreLinkFilesBase< + AIEAssignCoreLinkFilesPass> { + void runOnOperation() override { + DeviceOp device = getOperation(); + // Builder is used only for attribute construction; no ops are inserted. + Builder builder(device.getContext()); + + // Build a map from func name to the object file(s) it requires, sourced + // from the "link_with" string attribute on func.func declarations. + // StringRefs are views into MLIRContext-owned storage and remain valid + // for the entire pass run. + DenseMap> funcToObjs; + for (auto funcOp : device.getOps()) { + if (auto attr = funcOp->getAttrOfType("link_with")) { + funcToObjs[funcOp.getName()].push_back(attr.getValue()); + } + } + + // Tracks which func.func symbols are directly called from at least one + // core; used to warn about link_with-bearing functions that are never + // called and whose object files would otherwise be silently omitted. + llvm::DenseSet usedFuncs; + + // Only direct func.call edges are traced. func.call_indirect ops and + // calls through intermediate wrapper functions are not followed. To + // handle transitive dependencies, attach link_with directly to every + // func.func declaration that a core calls, even thin wrappers. + // TODO: extend to transitive call resolution. + device.walk([&](CoreOp core) { + // De-duplicate while preserving insertion order. + llvm::SetVector needed; + + // Migrate deprecated core-level attr: warn, consume it, and add to set. + if (auto lw = core.getLinkWith()) { + core.emitWarning( + "link_with on aie.core is deprecated; attach link_with to " + "the func.func declaration instead"); + needed.insert(lw.value()); + core->removeAttr("link_with"); + } + + // Single walk over the core body: collect required object files and + // record called symbols (for the unused-func warning below). + core.walk([&](Operation *op) { + if (auto call = dyn_cast(op)) { + usedFuncs.insert(call.getCallee()); + auto it = funcToObjs.find(call.getCallee()); + if (it != funcToObjs.end()) + for (StringRef obj : it->second) + needed.insert(obj); + } else if (auto indCall = dyn_cast(op)) { + indCall.emitWarning( + "indirect call in core body — link_with attributes on " + "indirectly-called functions are not automatically resolved; " + "add a direct func.call to the required func.func declaration " + "so that aie-assign-core-link-files can trace the dependency"); + } + }); + + if (!needed.empty()) { + // builder is used only for attribute construction; its insertion + // point is irrelevant and no ops are inserted. + core.setLinkFilesAttr(builder.getStrArrayAttr(needed.getArrayRef())); + } + }); + + // Warn about funcs with link_with that are never called from any core. + for (auto &[funcName, objs] : funcToObjs) { + if (!usedFuncs.count(funcName)) { + if (auto funcOp = device.lookupSymbol(funcName)) + funcOp.emitWarning() + << "func '" << funcName + << "' has link_with but is never called from any core; " + "its .o file will not be linked"; + } + } + } +}; + +std::unique_ptr> +AIE::createAIEAssignCoreLinkFilesPass() { + return std::make_unique(); +} diff --git a/lib/Dialect/AIE/Transforms/CMakeLists.txt b/lib/Dialect/AIE/Transforms/CMakeLists.txt index 89ed2ae12df..cfb2c973d9f 100644 --- a/lib/Dialect/AIE/Transforms/CMakeLists.txt +++ b/lib/Dialect/AIE/Transforms/CMakeLists.txt @@ -7,8 +7,9 @@ add_mlir_dialect_library( AIETransforms - AIEAssignBuffers.cpp AIEAssignBufferDescriptorIDs.cpp + AIEAssignBuffers.cpp + AIEAssignCoreLinkFiles.cpp AIEAssignLockIDs.cpp AIEFindFlows.cpp AIEPathFinder.cpp diff --git a/lib/Targets/AIETargetBCF.cpp b/lib/Targets/AIETargetBCF.cpp index 2e656d0f164..4148841a53d 100644 --- a/lib/Targets/AIETargetBCF.cpp +++ b/lib/Targets/AIETargetBCF.cpp @@ -139,9 +139,18 @@ LogicalResult AIETranslateToBCF(ModuleOp module, raw_ostream &output, << utohexstr(addressSpaceSize - dataMemoryEnd) << " // And everything else the core can't see\n"; - if (tile.getCoreOp() && tile.getCoreOp().getLinkWith()) - output << "_include _file " - << tile.getCoreOp().getLinkWith().value().str() << "\n"; + if (auto coreOp = tile.getCoreOp()) { + if (auto filesAttr = coreOp.getLinkFiles()) { + // Canonical path: link_files populated by aie-assign-core-link-files. + for (auto f : filesAttr->getAsRange()) + output << "_include _file " << f.getValue() << "\n"; + } else if (coreOp.getLinkWith()) { + // Deprecated fallback: core-level link_with was not migrated by + // aie-assign-core-link-files (e.g., the pass was not run). + output << "_include _file " << coreOp.getLinkWith().value().str() + << "\n"; + } + } output << "_resolve _main core_" << tile.getCol() << "_" << tile.getRow() << "\n"; } diff --git a/lib/Targets/AIETargetLdScript.cpp b/lib/Targets/AIETargetLdScript.cpp index d94ae5a0e03..1cf87059c3f 100644 --- a/lib/Targets/AIETargetLdScript.cpp +++ b/lib/Targets/AIETargetLdScript.cpp @@ -175,10 +175,19 @@ SECTIONS targetModel.getMemEastBaseAddress(), std::string("east")); output << " .bss : { *(.bss*) } > data\n"; + // INPUT() directives must follow the closing brace of SECTIONS; placing + // them inside SECTIONS is invalid linker script syntax. output << "}\n"; if (auto coreOp = tile.getCoreOp()) { - if (auto fileAttr = coreOp.getLinkWith()) + if (auto filesAttr = coreOp.getLinkFiles()) { + // Canonical path: link_files populated by aie-assign-core-link-files. + for (auto f : filesAttr->getAsRange()) + output << "INPUT(" << f.getValue() << ")\n"; + } else if (auto fileAttr = coreOp.getLinkWith()) { + // Deprecated fallback: core-level link_with was not migrated by + // aie-assign-core-link-files (e.g., the pass was not run). output << "INPUT(" << fileAttr.value().str() << ")\n"; + } output << "PROVIDE(main = core_" << tile.getCol() << "_" << tile.getRow() << ");\n"; diff --git a/mlir_exercises/tutorial-8/aie.mlir b/mlir_exercises/tutorial-8/aie.mlir index 9bd2b304635..be23b4da763 100644 --- a/mlir_exercises/tutorial-8/aie.mlir +++ b/mlir_exercises/tutorial-8/aie.mlir @@ -29,8 +29,8 @@ module @tutorial_8 { // declare 2 kernel functions name "extern_kernel1" and "extern_kernel2" // with one positional function argument, in this case mapped to a memref - func.func private @extern_kernel1() -> () - func.func private @extern_kernel2(%b: memref<256xi32>) -> () + func.func private @extern_kernel1() -> () attributes {link_with = "kernel1.o"} + func.func private @extern_kernel2(%b: memref<256xi32>) -> () attributes {link_with = "kernel2.o"} // Declare shared lock (belonging to tile(2,4), lock ID=1) // %lock13_1 = aie.lock(%tile13, 1) { sym_name = "lock_13_1" } @@ -49,7 +49,7 @@ module @tutorial_8 { // aie.use_lock(%lock13_1, "Release", 1) aie.end - } { link_with="kernel1.o" } + } // Define core algorithm for tile(2,4) which reads value set by tile(1,4) // buf[5] = buf[3] + 100 @@ -74,6 +74,6 @@ module @tutorial_8 { // This release means our 2nd core is done aie.use_lock(%lock13_2, "Release", 1) aie.end - } { link_with="kernel2.o" } + } } diff --git a/mlir_exercises/tutorial-8/answers/aie.mlir b/mlir_exercises/tutorial-8/answers/aie.mlir index 00e484b2182..4ea44a0446c 100755 --- a/mlir_exercises/tutorial-8/answers/aie.mlir +++ b/mlir_exercises/tutorial-8/answers/aie.mlir @@ -30,8 +30,8 @@ module @tutorial_8 { // declare 2 kernel functions name "extern_kernel1" and "extern_kernel2" // with one positional function argument, in this case mapped to a memref - func.func private @extern_kernel1() -> () - func.func private @extern_kernel2(%b: memref<256xi32>) -> () + func.func private @extern_kernel1() -> () attributes {link_with = "kernel1.o"} + func.func private @extern_kernel2(%b: memref<256xi32>) -> () attributes {link_with = "kernel2.o"} // Declare shared lock (belonging to tile(2,4), lock ID=1), do not change symbolic name to allow reuse of test.cpp @@ -52,7 +52,7 @@ module @tutorial_8 { // aie.use_lock(%lock23_1, "Release", 1) aie.end - } { link_with="kernel2.o" } + } // Define core algorithm for tile(2,4) which reads value set by tile(1,4) // buf[5] = buf[3] + 100 @@ -73,6 +73,6 @@ module @tutorial_8 { // aie.use_lock(%lock24_1, "Release", 0) aie.end - } { link_with="kernel1.o" } + } } diff --git a/mlir_exercises/tutorial-9/README.md b/mlir_exercises/tutorial-9/README.md index aa63c1da39d..394b76bc9ac 100755 --- a/mlir_exercises/tutorial-9/README.md +++ b/mlir_exercises/tutorial-9/README.md @@ -15,14 +15,14 @@ MLIR gives us the ability to leverage different dialects such as [arith](https:/ Specifically, to support external functions, we use the operators `func.func` and `func.call` as follows: ``` -func.func private @extern_kernel(%b: memref<256xi32>) -> () +func.func private @extern_kernel(%b: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core14 = AIE.core(%tile14) { func.call @extern_kernel(%buf) : (memref<256xi32>) -> () AIE.end -} { link_with="kernel.o"} +} ``` -In this MLIR code snippet, we see that we first call `func.func` to declare a private function whose function signature matches that of the AIE C/C++ function. The function name after the @ (e.g. `@external_kernel`) should match the C function name and the number of arguments should match the number of C function arguments. C++ name mangling is not supported. Argument types are converted according to the MLIR ['bare pointer' calling convention](https://mlir.llvm.org/docs/TargetLLVMIR/#bare-pointer-calling-convention-for-ranked-memref) (see below). +In this MLIR code snippet, we see that we first call `func.func` to declare a private function whose function signature matches that of the AIE C/C++ function. The function name after the @ (e.g. `@external_kernel`) should match the C function name and the number of arguments should match the number of C function arguments. C++ name mangling is not supported. Argument types are converted according to the MLIR ['bare pointer' calling convention](https://mlir.llvm.org/docs/TargetLLVMIR/#bare-pointer-calling-convention-for-ranked-memref) (see below). | MLIR type | C type | | ----------- | ----------- | @@ -31,9 +31,9 @@ In this MLIR code snippet, we see that we first call `func.func` to declare a pr | Memref | C pointer | | index | int64_t | -Then, within the `AIE.core` operator, we use `func.call` to call the previously defined function from within our core, being sure to pass the appropriate function arguments. In this case, we pass in the the `AIE.buffer` `%buf`. +Then, within the `AIE.core` operator, we use `func.call` to call the previously defined function from within our core, being sure to pass the appropriate function arguments. In this case, we pass in the the `AIE.buffer` `%buf`. -The final step is to tell our tools where to look for the object code that the function whose name we defined in `func.func`/ `func.call`. Using the additional operator definition `link_with="kernel.o"`, we point to the file `kernel.o` in the current directory and link it in to create the final kernel object file. +The final step is to tell our tools where to look for the object code that the function whose name we defined in `func.func`/ `func.call`. Using the `link_with` attribute on the `func.func` declaration (e.g. `attributes {link_with = "kernel.o"}`), we point to the file `kernel.o` in the current directory and link it in to create the final kernel object file. > Note that this allows us to call the function multiple times within the `AIE.core` or even separate functions in the same `AIE.core` if they are both defined within the single linked object file. ## Kernel object file generation diff --git a/mlir_exercises/tutorial-9/aie.mlir b/mlir_exercises/tutorial-9/aie.mlir index 4544eb57168..31c02ce4a64 100644 --- a/mlir_exercises/tutorial-9/aie.mlir +++ b/mlir_exercises/tutorial-9/aie.mlir @@ -35,7 +35,7 @@ module @tutorial_9 { // declare kernel function name "extern_kernel" with one positional // function argument, in this case mapped to a memref - func.func private @extern_kernel(%b: memref<256xi32>) -> () + func.func private @extern_kernel(%b: memref<256xi32>) -> () attributes {link_with = "kernel.o"} // Define the algorithm for the core of tile(1, 4) // buf[3] = 14 @@ -52,6 +52,6 @@ module @tutorial_9 { // by acquiring this lock (with value 1). aie.use_lock(%lock14_0, "Release", 1) aie.end - } { link_with="kernel.o" } // indicate kernel object name used by this core + } // indicate kernel object name used by this core } diff --git a/mlir_exercises/tutorial-9/answers/aie_matmul.mlir b/mlir_exercises/tutorial-9/answers/aie_matmul.mlir index 6ab5b0aa15d..d4c898a0eba 100644 --- a/mlir_exercises/tutorial-9/answers/aie_matmul.mlir +++ b/mlir_exercises/tutorial-9/answers/aie_matmul.mlir @@ -38,7 +38,7 @@ module @tutorial_9 { // declare kernel function name "extern_kernel" with one positional // function argument, in this case mapped to a memref - func.func private @extern_kernel(%a: memref<32xi32>, %b: memref<32xi32>, %acc: memref<32xi32>, %c: memref<32xi32>) -> () + func.func private @extern_kernel(%a: memref<32xi32>, %b: memref<32xi32>, %acc: memref<32xi32>, %c: memref<32xi32>) -> () attributes {link_with = "kernel_matmul.o"} // Define the algorithm for the core of tile(1, 4) // buf[3] = 14 @@ -55,6 +55,6 @@ module @tutorial_9 { // by acquiring this lock (with value 1). aie.use_lock(%lock14_0, "Release", 1) aie.end - } { link_with="kernel_matmul.o" } // indicate kernel object name used by this core + } // indicate kernel object name used by this core } diff --git a/programming_examples/basic/event_trace/aie_trace.mlir b/programming_examples/basic/event_trace/aie_trace.mlir index 28a6ef5b3eb..c6bc449243f 100644 --- a/programming_examples/basic/event_trace/aie_trace.mlir +++ b/programming_examples/basic/event_trace/aie_trace.mlir @@ -20,7 +20,7 @@ module { aie.device(NPUDEVICE) { // External kernel function declaration - func.func private @vector_scalar_mul_aie_scalar(memref<1024xi32>, memref<1024xi32>, memref<1xi32>, i32) + func.func private @vector_scalar_mul_aie_scalar(memref<1024xi32>, memref<1024xi32>, memref<1xi32>, i32) attributes {link_with = "scale.o"} // Tile declarations %shim_noc_tile_0_0 = aie.tile(0, 0) @@ -55,7 +55,7 @@ module { aie.objectfifo.release @infactor(Consume, 1) } aie.end - } {link_with = "scale.o"} + } // ======================================================================== // TRACE CONFIGURATION diff --git a/programming_examples/basic/event_trace/aie_trace.py b/programming_examples/basic/event_trace/aie_trace.py index 652412fa98c..3b4bc6f7b9a 100644 --- a/programming_examples/basic/event_trace/aie_trace.py +++ b/programming_examples/basic/event_trace/aie_trace.py @@ -44,6 +44,7 @@ def device_body(): scale = external_func( "vector_scalar_mul_aie_scalar", inputs=[tile_ty, tile_ty, scalar_ty, np.int32], + link_with="scale.o", ) # Tile declarations @@ -56,7 +57,7 @@ def device_body(): of_out = object_fifo("out", tile_0_2, shim_noc_tile_0_0, 2, tile_ty) # Core computation - @core(tile_0_2, "scale.o") + @core(tile_0_2) def core_body(): for _ in range_(sys.maxsize): elem_factor = of_factor.acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/basic/matrix_multiplication/cascade/cascade.py b/programming_examples/basic/matrix_multiplication/cascade/cascade.py index 0d86477e8ae..bad349d14f1 100644 --- a/programming_examples/basic/matrix_multiplication/cascade/cascade.py +++ b/programming_examples/basic/matrix_multiplication/cascade/cascade.py @@ -136,18 +136,25 @@ def device_body(): C_l1_ty = np.ndarray[(m, n), np.dtype[dtype_out]] # AIE Core Function declarations - zero_scalar = external_func(f"zero_scalar_{dtype_out_str}", inputs=[C_l1_ty]) + zero_scalar = external_func( + f"zero_scalar_{dtype_out_str}", + inputs=[C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul_scalar_cascade_get_only = external_func( f"matmul_scalar_cascade_get_only_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) matmul_scalar_cascade_put_only = external_func( f"matmul_scalar_cascade_put_only_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) matmul_scalar_cascade_put_get = external_func( f"matmul_scalar_cascade_put_get_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations as tile[row][col] @@ -278,7 +285,7 @@ def device_body(): for row in range(n_aie_rows): for col in range(n_aie_cols): - @core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o") + @core(core_tiles[row][col]) def core_body(): for _ in range_(0xFFFFFFFF): loop = ( diff --git a/programming_examples/basic/matrix_multiplication/cascade/cascade_placed.py b/programming_examples/basic/matrix_multiplication/cascade/cascade_placed.py index e36d7b169b9..361d3920b0a 100644 --- a/programming_examples/basic/matrix_multiplication/cascade/cascade_placed.py +++ b/programming_examples/basic/matrix_multiplication/cascade/cascade_placed.py @@ -163,18 +163,25 @@ def device_body(): C_l1_ty = np.ndarray[(m, n), np.dtype[dtype_out]] # AIE Core Function declarations - zero_scalar = external_func(f"zero_scalar_{dtype_out_str}", inputs=[C_l1_ty]) + zero_scalar = external_func( + f"zero_scalar_{dtype_out_str}", + inputs=[C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul_scalar_cascade_get_only = external_func( f"matmul_scalar_cascade_get_only_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) matmul_scalar_cascade_put_only = external_func( f"matmul_scalar_cascade_put_only_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) matmul_scalar_cascade_put_get = external_func( f"matmul_scalar_cascade_put_get_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations as tile[row][col] @@ -305,7 +312,7 @@ def device_body(): for row in range(n_aie_rows): for col in range(n_aie_cols): - @core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o") + @core(core_tiles[row][col]) def core_body(): for _ in range_(0xFFFFFFFF): loop = ( diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector.py b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector.py index 5d8f4463b3c..138d729736a 100644 --- a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector.py +++ b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector.py @@ -57,10 +57,15 @@ def device_body(): # AIE Core Function declarations func_type = "vectorized" if vectorized else "scalar" - zero = external_func(f"zero_{func_type}_{dtype_out_str}", inputs=[outC_ty]) + zero = external_func( + f"zero_{func_type}_{dtype_out_str}", + inputs=[outC_ty], + link_with=f"mv_{m}x{k}.o", + ) matvec = external_func( f"matvec_{func_type}_{dtype_in_str}_{dtype_out_str}", inputs=[A_ty, inB_ty, outC_ty], + link_with=f"mv_{m}x{k}.o", ) # Tile declarations @@ -122,7 +127,7 @@ def device_body(): # Set up compute tiles for i in range(n_cores): # Compute tile i - @core(cores[i], f"mv_{m}x{k}.o") + @core(cores[i]) def core_body(): for _ in range_(0xFFFFFFFF): elem_out = outC_fifos[i].acquire( diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_placed.py b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_placed.py index 3c7506dc897..0ae9a97216c 100644 --- a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_placed.py +++ b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_placed.py @@ -57,10 +57,15 @@ def device_body(): # AIE Core Function declarations func_type = "vectorized" if vectorized else "scalar" - zero = external_func(f"zero_{func_type}_{dtype_out_str}", inputs=[outC_ty]) + zero = external_func( + f"zero_{func_type}_{dtype_out_str}", + inputs=[outC_ty], + link_with=f"mv_{m}x{k}.o", + ) matvec = external_func( f"matvec_{func_type}_{dtype_in_str}_{dtype_out_str}", inputs=[A_ty, inB_ty, outC_ty], + link_with=f"mv_{m}x{k}.o", ) # Tile declarations @@ -122,7 +127,7 @@ def device_body(): # Set up compute tiles for i in range(n_cores): # Compute tile i - @core(cores[i], f"mv_{m}x{k}.o") + @core(cores[i]) def core_body(): for _ in range_(0xFFFFFFFF): elem_out = outC_fifos[i].acquire( diff --git a/programming_examples/basic/matrix_multiplication/single_core/single_core.py b/programming_examples/basic/matrix_multiplication/single_core/single_core.py index 0a9ccbbc2da..375a9f7b9fe 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/single_core.py +++ b/programming_examples/basic/matrix_multiplication/single_core/single_core.py @@ -15,7 +15,6 @@ from aie.iron.controlflow import range_ from aie.iron.dtype import str_to_dtype - microkernel_mac_dim_map = { "npu": { "bf16": (4, 8, 4), @@ -146,11 +145,16 @@ def device_body(): # AIE Core Function declarations func_type = "" if vectorized else "scalar_" - zero = external_func(f"zero_{func_type}{dtype_out_str}", inputs=[c_ty]) + zero = external_func( + f"zero_{func_type}{dtype_out_str}", + inputs=[c_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul_func_name = f"matmul_{func_type}{dtype_in_str}_{dtype_out_str}" matmul = external_func( matmul_func_name, inputs=[a_ty, b_ty, c_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations @@ -244,7 +248,7 @@ def device_body(): # Exceding the stack size leads to wrong results from the kernel, but no error is triggered. # Stack usage can be checked as explained here: # https://github.com/Xilinx/llvm-aie/issues/487#issuecomment-2969438585 - @core(compute_tile2, f"mm_{m}x{k}x{n}.o", stack_size=0xD00) + @core(compute_tile2, stack_size=0xD00) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles) if tiles > 1 else range(1): # issue #1547 diff --git a/programming_examples/basic/matrix_multiplication/single_core/single_core_placed.py b/programming_examples/basic/matrix_multiplication/single_core/single_core_placed.py index 373a2996238..857bc02fd08 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/single_core_placed.py +++ b/programming_examples/basic/matrix_multiplication/single_core/single_core_placed.py @@ -19,7 +19,6 @@ from aie.iron.controlflow import range_ from aie.iron.dtype import str_to_dtype - microkernel_mac_dim_map = { "npu": { "bf16": (4, 8, 4), @@ -175,11 +174,16 @@ def device_body(): # AIE Core Function declarations func_type = "" if vectorized else "scalar_" - zero = external_func(f"zero_{func_type}{dtype_out_str}", inputs=[c_ty]) + zero = external_func( + f"zero_{func_type}{dtype_out_str}", + inputs=[c_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul_func_name = f"matmul_{func_type}{dtype_in_str}_{dtype_out_str}" matmul = external_func( matmul_func_name, inputs=[a_ty, b_ty, c_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations @@ -269,7 +273,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(compute_tile2, f"mm_{m}x{k}x{n}.o", stack_size=0xD00) + @core(compute_tile2, stack_size=0xD00) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles) if tiles > 1 else range(1): # issue #1547 diff --git a/programming_examples/basic/matrix_multiplication/whole_array/whole_array.py b/programming_examples/basic/matrix_multiplication/whole_array/whole_array.py index 394dc6165a8..1ce32e832a2 100644 --- a/programming_examples/basic/matrix_multiplication/whole_array/whole_array.py +++ b/programming_examples/basic/matrix_multiplication/whole_array/whole_array.py @@ -16,7 +16,6 @@ from aie.iron import str_to_dtype - microkernel_mac_dim_map = { "npu": { "bf16": (4, 8, 4), @@ -222,10 +221,15 @@ def device_body(): # AIE Core Function declarations scalar_suffix = "_scalar" if use_scalar else "" - zero = external_func(f"zero{scalar_suffix}_{dtype_out_str}", inputs=[C_l1_ty]) + zero = external_func( + f"zero{scalar_suffix}_{dtype_out_str}", + inputs=[C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul = external_func( f"matmul{scalar_suffix}_{dtype_in_str}_{dtype_out_str}", inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations as tile[row][col] @@ -397,7 +401,7 @@ def device_body(): # Exceding the stack size leads to wrong results from the kernel, but no error is triggered. # Stack usage can be checked as explained here: # https://github.com/Xilinx/llvm-aie/issues/487#issuecomment-2969438585 - @core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o", stack_size=0xD00) + @core(core_tiles[row][col], stack_size=0xD00) def core_body(): for _ in range_(0xFFFFFFFF): loop = ( diff --git a/programming_examples/basic/matrix_multiplication/whole_array/whole_array_placed.py b/programming_examples/basic/matrix_multiplication/whole_array/whole_array_placed.py index 95703a66f79..85c6467f8c6 100644 --- a/programming_examples/basic/matrix_multiplication/whole_array/whole_array_placed.py +++ b/programming_examples/basic/matrix_multiplication/whole_array/whole_array_placed.py @@ -15,7 +15,6 @@ from aie.helpers.taplib import TensorTiler2D, TensorAccessSequence from aie.iron import str_to_dtype - microkernel_mac_dim_map = { "npu": { "bf16": (4, 8, 4), @@ -211,11 +210,16 @@ def device_body(): C_l1_ty = np.ndarray[(m, n), np.dtype[dtype_out]] # AIE Core Function declarations - zero = external_func(f"zero_{dtype_out_str}", inputs=[C_l1_ty]) + zero = external_func( + f"zero_{dtype_out_str}", + inputs=[C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", + ) matmul_vectorized_func_name = f"matmul_{dtype_in_str}_{dtype_out_str}" matmul = external_func( matmul_vectorized_func_name, inputs=[A_l1_ty, B_l1_ty, C_l1_ty], + link_with=f"mm_{m}x{k}x{n}.o", ) # Tile declarations as tile[row][col] @@ -364,7 +368,7 @@ def device_body(): for row in range(n_aie_rows): for col in range(n_aie_cols): - @core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o", stack_size=0xD00) + @core(core_tiles[row][col], stack_size=0xD00) def core_body(): for _ in range_(0xFFFFFFFF): loop = ( diff --git a/programming_examples/basic/packet_switch/aie_add_placed.py b/programming_examples/basic/packet_switch/aie_add_placed.py index 3386c74b127..ceb88f5e53f 100644 --- a/programming_examples/basic/packet_switch/aie_add_placed.py +++ b/programming_examples/basic/packet_switch/aie_add_placed.py @@ -36,8 +36,8 @@ def device_body(): # Size of input vector + 4 bytes for the packet header (used in memtile_0_1 DMA logic) vector_with_packet_ty = np.ndarray[(in_out_size + 4,), in_out_ty] - add_func = external_func("add", [vector_ty, vector_ty]) - mult_func = external_func("mul", [vector_ty, vector_ty]) + add_func = external_func("add", [vector_ty, vector_ty], link_with="add_mul.o") + mult_func = external_func("mul", [vector_ty, vector_ty], link_with="add_mul.o") ShimTile_0_0 = tile(0, 0) MemTile_0_1 = tile(0, 1) @@ -159,7 +159,7 @@ def device_body(): ) # core_0_2 compute - @core(CT_0_2, "add_mul.o") + @core(CT_0_2) def core_body(): for _ in range_(sys.maxsize): # Acquire locks to read core02_buff_in and write core02_buff_out @@ -193,7 +193,7 @@ def m(block): EndOp() # core_0_3 compute - @core(CT_0_3, "add_mul.o") + @core(CT_0_3) def core_body(): for _ in range_(sys.maxsize): # Acquire locks to read core03_buff_in and write core03_buff_out diff --git a/programming_examples/basic/packet_switch/aie_mul_placed.py b/programming_examples/basic/packet_switch/aie_mul_placed.py index 24f304ed4cd..2a03fdc8c23 100644 --- a/programming_examples/basic/packet_switch/aie_mul_placed.py +++ b/programming_examples/basic/packet_switch/aie_mul_placed.py @@ -36,8 +36,8 @@ def device_body(): # Size of input vector + 4 bytes for the packet header (used in memtile_0_1 DMA logic) vector_with_packet_ty = np.ndarray[(in_out_size + 4,), in_out_ty] - add_func = external_func("add", [vector_ty, vector_ty]) - mult_func = external_func("mul", [vector_ty, vector_ty]) + add_func = external_func("add", [vector_ty, vector_ty], link_with="add_mul.o") + mult_func = external_func("mul", [vector_ty, vector_ty], link_with="add_mul.o") ShimTile_0_0 = tile(0, 0) MemTile_0_1 = tile(0, 1) @@ -159,7 +159,7 @@ def device_body(): ) # core_0_2 compute - @core(CT_0_2, "add_mul.o") + @core(CT_0_2) def core_body(): for _ in range_(sys.maxsize): # Acquire locks to read core02_buff_in and write core02_buff_out @@ -193,7 +193,7 @@ def m(block): EndOp() # core_0_3 compute - @core(CT_0_3, "add_mul.o") + @core(CT_0_3) def core_body(): for _ in range_(sys.maxsize): # Acquire locks to read core03_buff_in and write core03_buff_out diff --git a/programming_examples/basic/passthrough_kernel/passthrough_kernel_placed.py b/programming_examples/basic/passthrough_kernel/passthrough_kernel_placed.py index a9fe504cced..aa752706e56 100644 --- a/programming_examples/basic/passthrough_kernel/passthrough_kernel_placed.py +++ b/programming_examples/basic/passthrough_kernel/passthrough_kernel_placed.py @@ -36,7 +36,9 @@ def device_body(): # AIE Core Function declarations passThroughLine = external_func( - "passThroughLine", inputs=[line_ty, line_ty, np.int32] + "passThroughLine", + inputs=[line_ty, line_ty, np.int32], + link_with="passThrough.cc.o", ) # Tile declarations @@ -55,7 +57,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "passThrough.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/programming_examples/basic/row_wise_bias_add/row_wise_bias_add_placed.py b/programming_examples/basic/row_wise_bias_add/row_wise_bias_add_placed.py index bc2acbdb574..d2ec2133ab4 100644 --- a/programming_examples/basic/row_wise_bias_add/row_wise_bias_add_placed.py +++ b/programming_examples/basic/row_wise_bias_add/row_wise_bias_add_placed.py @@ -26,7 +26,9 @@ def device_body(): bias_ty = np.ndarray[(n,), np.dtype[np.float32]] kernel_func = external_func( - f"row_wise_bias_add_f32_f32", inputs=[tensor_ty, bias_ty, tensor_ty] + f"row_wise_bias_add_f32_f32", + inputs=[tensor_ty, bias_ty, tensor_ty], + link_with="kernel.o", ) shim_tile = tile(0, 0) @@ -36,7 +38,7 @@ def device_body(): bias_fifo = object_fifo("bias_fifo", shim_tile, compute_tile, 2, bias_ty) out_fifo = object_fifo("out_fifo", compute_tile, shim_tile, 2, tensor_ty) - @core(compute_tile, "kernel.o") + @core(compute_tile) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(N // n): diff --git a/programming_examples/basic/vector_exp/vector_exp_placed.py b/programming_examples/basic/vector_exp/vector_exp_placed.py index 11a1a711d09..cfb87b39925 100644 --- a/programming_examples/basic/vector_exp/vector_exp_placed.py +++ b/programming_examples/basic/vector_exp/vector_exp_placed.py @@ -60,7 +60,9 @@ def device_body(): # AIE Core Function declarations - exp_bf16_1024 = external_func("exp_bf16_1024", inputs=[tile_ty, tile_ty]) + exp_bf16_1024 = external_func( + "exp_bf16_1024", inputs=[tile_ty, tile_ty], link_with="kernels.a" + ) # Tile declarations ShimTile = tile(0, 0) @@ -105,7 +107,7 @@ def device_body(): # Compute tile bodies for i in range(n_cores): # Compute tile i - @core(cores[i], "kernels.a") + @core(cores[i]) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles): diff --git a/programming_examples/basic/vector_reduce_add/vector_reduce_add_placed.py b/programming_examples/basic/vector_reduce_add/vector_reduce_add_placed.py index 870e47b88b4..c6d84bed1b0 100644 --- a/programming_examples/basic/vector_reduce_add/vector_reduce_add_placed.py +++ b/programming_examples/basic/vector_reduce_add/vector_reduce_add_placed.py @@ -38,7 +38,9 @@ def device_body(): # AIE Core Function declarations reduce_add_vector = external_func( - "reduce_add_vector", inputs=[in_ty, out_ty, np.int32] + "reduce_add_vector", + inputs=[in_ty, out_ty, np.int32], + link_with="reduce_add.cc.o", ) # Tile declarations @@ -52,7 +54,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "reduce_add.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(0xFFFFFFFF): elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max_placed.py b/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max_placed.py index 502e3fff26a..de4bea88913 100644 --- a/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max_placed.py +++ b/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max_placed.py @@ -53,9 +53,15 @@ def device_body(): # AIE Core Function declarations suffix = "_bfloat16" if dtype_str == "bf16" else "" reduce_max_vector = external_func( - f"reduce_max_vector{suffix}", [op_ty, out_ty, np.int32] + f"reduce_max_vector{suffix}", + [op_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", + ) + compute_max = external_func( + f"compute_max{suffix}", + [out_ty, out_ty, out_ty], + link_with="reduce_max.cc.o", ) - compute_max = external_func(f"compute_max{suffix}", [out_ty, out_ty, out_ty]) min_val = ( np.array([bfloat16(float("-inf"))], dtype=dtype) if dtype_str == "bf16" @@ -155,7 +161,7 @@ def device_body(): initial_value=min_val, ) - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): elem_out = out_fifos[i].acquire(ObjectFifoPort.Produce, 1) for _ in range_(num_iter): diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained_placed.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained_placed.py index 5cc0e56eff8..3b2d9fa037d 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained_placed.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained_placed.py @@ -46,10 +46,14 @@ def device_body(): # AIE Core Function declarations suffix = "_bfloat16" if dtype_str == "bf16" else "" reduce_max_vector = external_func( - f"reduce_max_vector{suffix}", inputs=[op_ty, out_ty, np.int32] + f"reduce_max_vector{suffix}", + inputs=[op_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) compute_max = external_func( - f"compute_max{suffix}", inputs=[out_ty, out_ty, out_ty] + f"compute_max{suffix}", + inputs=[out_ty, out_ty, out_ty], + link_with="reduce_max.cc.o", ) min_val = ( np.array([bfloat16(float("-inf"))], dtype=dtype) @@ -116,7 +120,7 @@ def device_body(): ) if i == n_cores - 1: - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): elem_out = out_fifos[i].acquire(ObjectFifoPort.Produce, 1) for _ in range_(num_iter): @@ -129,7 +133,7 @@ def core_body(): else: - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): for _ in range_(num_iter): elem_in = in_fifos[i].acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile_placed.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile_placed.py index 96bc7b16e24..f941906caac 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile_placed.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile_placed.py @@ -48,13 +48,19 @@ def device_body(): suffix = "_bfloat16" if dtype_str == "bf16" else "" reduce_max_vector = external_func( - f"reduce_max_vector{suffix}", inputs=[op_ty, out_ty, np.int32] + f"reduce_max_vector{suffix}", + inputs=[op_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) reduce_max_scalar = external_func( - f"reduce_max_scalar{suffix}", inputs=[int_ty, out_ty, np.int32] + f"reduce_max_scalar{suffix}", + inputs=[int_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) compute_max = external_func( - f"compute_max{suffix}", inputs=[out_ty, out_ty, out_ty] + f"compute_max{suffix}", + inputs=[out_ty, out_ty, out_ty], + link_with="reduce_max.cc.o", ) min_val = ( np.array([bfloat16(float("-inf"))], dtype=dtype) @@ -136,7 +142,7 @@ def device_body(): initial_value=min_val, ) - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): elem_out = out_fifos[i].acquire(ObjectFifoPort.Produce, 1) for _ in range_(num_iter): diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared_placed.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared_placed.py index feae246ea75..20816f6cd38 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared_placed.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared_placed.py @@ -46,10 +46,14 @@ def device_body(): # AIE Core Function declarations suffix = "_bfloat16" if dtype_str == "bf16" else "" reduce_max_vector = external_func( - f"reduce_max_vector{suffix}", inputs=[op_ty, out_ty, np.int32] + f"reduce_max_vector{suffix}", + inputs=[op_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) compute_max = external_func( - f"compute_max{suffix}", inputs=[out_ty, out_ty, out_ty] + f"compute_max{suffix}", + inputs=[out_ty, out_ty, out_ty], + link_with="reduce_max.cc.o", ) min_val = ( np.array([bfloat16(float("-inf"))], dtype=dtype) @@ -123,7 +127,7 @@ def device_body(): ) if i != 1: - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): elem_out = out_fifos[i].acquire(ObjectFifoPort.Produce, 1) for _ in range_(num_iter): @@ -136,7 +140,7 @@ def core_body(): else: - @core(cores[i], "reduce_max.cc.o") + @core(cores[i]) def core_body(): for _ in range_(num_iter): elem_in = in_fifos[i].acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max_placed.py b/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max_placed.py index f5c00b8e0ce..0d6514fff6f 100644 --- a/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max_placed.py +++ b/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max_placed.py @@ -38,11 +38,15 @@ def device_body(): # AIE Core Function declarations if dtype_str == "bf16": reduce_max_vector = external_func( - "reduce_max_vector_bfloat16", inputs=[in_ty, out_ty, np.int32] + "reduce_max_vector_bfloat16", + inputs=[in_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) else: reduce_max_vector = external_func( - "reduce_max_vector", inputs=[in_ty, out_ty, np.int32] + "reduce_max_vector", + inputs=[in_ty, out_ty, np.int32], + link_with="reduce_max.cc.o", ) # Tile declarations @@ -61,7 +65,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "reduce_max.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(0xFFFFFFFF): elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/programming_examples/basic/vector_reduce_min/vector_reduce_min_placed.py b/programming_examples/basic/vector_reduce_min/vector_reduce_min_placed.py index 5073caed50b..d4e5fd50e9f 100644 --- a/programming_examples/basic/vector_reduce_min/vector_reduce_min_placed.py +++ b/programming_examples/basic/vector_reduce_min/vector_reduce_min_placed.py @@ -38,7 +38,9 @@ def device_body(): # AIE Core Function declarations reduce_min_vector = external_func( - "reduce_min_vector", inputs=[in_ty, out_ty, np.int32] + "reduce_min_vector", + inputs=[in_ty, out_ty, np.int32], + link_with="reduce_min.cc.o", ) # Tile declarations @@ -52,7 +54,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "reduce_min.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(0xFFFFFFFF): elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/programming_examples/basic/vector_scalar_mul/vector_scalar_mul_placed.py b/programming_examples/basic/vector_scalar_mul/vector_scalar_mul_placed.py index 3c31921abb0..21eebb35cb6 100644 --- a/programming_examples/basic/vector_scalar_mul/vector_scalar_mul_placed.py +++ b/programming_examples/basic/vector_scalar_mul/vector_scalar_mul_placed.py @@ -51,6 +51,7 @@ def device_body(): scale = external_func( f"vector_scalar_mul_{func_type}", inputs=[tile_ty, tile_ty, scalar_ty, np.int32], + link_with="scale.o", ) # Tile declarations @@ -65,7 +66,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "scale.o") + @core(ComputeTile2) def core_body(): # Effective while(1) for _ in range_(sys.maxsize): diff --git a/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion_placed.py b/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion_placed.py index 56651de802c..8b892f2ee74 100644 --- a/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion_placed.py +++ b/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion_placed.py @@ -39,10 +39,13 @@ def device_body(): conversion_func = external_func( "bf16_to_bfp_conversion", [tile_bf16_ty, tile_bf16_ty, tile_bfp16_ty, tile_bfp16_ty], + link_with="kernel.o", ) multiplication_func = external_func( - "bfp16_matrix_multiplication", [tile_bfp16_ty, tile_bfp16_ty, tile_bfp16_ty] + "bfp16_matrix_multiplication", + [tile_bfp16_ty, tile_bfp16_ty, tile_bfp16_ty], + link_with="kernel.o", ) # Tile declarations @@ -64,7 +67,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "kernel.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1) @@ -80,7 +83,7 @@ def core_body(): of_intermediate2.release(ObjectFifoPort.Produce, 1) # Compute tile 3 - @core(ComputeTile3, "kernel.o") + @core(ComputeTile3) def core_body(): for _ in range_(sys.maxsize): elem_in1 = of_intermediate1.acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough_placed.py b/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough_placed.py index 671e7d1ec60..5c145e4105d 100644 --- a/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough_placed.py +++ b/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough_placed.py @@ -28,7 +28,9 @@ def device_body(): tensor_ty = np.ndarray[(N,), np.dtype[v8bfp16ebs8]] tile_ty = np.ndarray[(n,), np.dtype[v8bfp16ebs8]] - kernel_func = external_func("bfp16_passthrough_vectorized", [tile_ty, tile_ty]) + kernel_func = external_func( + "bfp16_passthrough_vectorized", [tile_ty, tile_ty], link_with="kernel.o" + ) # Tile declarations ShimTile = tile(int(sys.argv[1]), 0) @@ -41,7 +43,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "kernel.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): elem_in1 = of_in1.acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/ml/bottleneck/bottleneck_placed.py b/programming_examples/ml/bottleneck/bottleneck_placed.py index cbf3388ef2a..61e8e37976b 100644 --- a/programming_examples/ml/bottleneck/bottleneck_placed.py +++ b/programming_examples/ml/bottleneck/bottleneck_placed.py @@ -105,6 +105,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1.o", ) conv2dk3 = external_func( "conv2dk3_ui8", @@ -123,6 +124,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk3.o", ) conv2dk1_skip = external_func( "conv2dk1_skip_i8", @@ -138,6 +140,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1_skip.o", ) ShimTile = tile(0, 0) @@ -242,7 +245,7 @@ def deviceBody(): ) # 1x1 conv2d - @core(ComputeTile2, "conv2dk1.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): use_lock(lock2, LockAction.Acquire, value=1) @@ -270,7 +273,7 @@ def core_body(): of_wts_buf_00.release(ObjectFifoPort.Consume, 1) # 3x3 conv2d OFM 0-31 - @core(ComputeTile3, "conv2dk3.o") + @core(ComputeTile3) def core_body(): scale = 11 for _ in range_(sys.maxsize): @@ -353,7 +356,7 @@ def core_body(): wts_buf_01.release(ObjectFifoPort.Consume, 1) # 3x3 conv2d OFM 32-63 - @core(ComputeTile5, "conv2dk3.o") + @core(ComputeTile5) def core_body(): scale = 11 for _ in range_(sys.maxsize): @@ -435,7 +438,7 @@ def core_body(): wts_buf_01.release(ObjectFifoPort.Consume, 1) # # 1x1 conv2d and add skip - @core(ComputeTile4, "conv2dk1_skip.o", stack_size=0xA00) + @core(ComputeTile4, stack_size=0xA00) def core_body(): for _ in range_(sys.maxsize): diff --git a/programming_examples/ml/conv2d/conv2d_placed.py b/programming_examples/ml/conv2d/conv2d_placed.py index 6455a8fc734..4f81d7856dc 100644 --- a/programming_examples/ml/conv2d/conv2d_placed.py +++ b/programming_examples/ml/conv2d/conv2d_placed.py @@ -57,6 +57,7 @@ def device_body(): np.int32, np.int32, ], + link_with="conv2dk1_i8.o", ) # Tile declarations @@ -99,7 +100,7 @@ def device_body(): ) # Compute tile 2 - @core(ComputeTile2, "conv2dk1_i8.o", stack_size=0x600) + @core(ComputeTile2, stack_size=0x600) def core_body(): y_dim = height x_dim = width diff --git a/programming_examples/ml/conv2d_14x14/conv2dk14_32core_placed.py b/programming_examples/ml/conv2d_14x14/conv2dk14_32core_placed.py index b28fdbbca2d..ad5d352a06f 100644 --- a/programming_examples/ml/conv2d_14x14/conv2dk14_32core_placed.py +++ b/programming_examples/ml/conv2d_14x14/conv2dk14_32core_placed.py @@ -87,6 +87,7 @@ def device_body(): np.int32, np.int32, ], + link_with="conv2dk14.o", ) # Tile declarations @@ -225,7 +226,7 @@ def device_body(): for i in range(n_aie_cols): for j in range(n_aie_rows): - @core(core_tiles[j][i], "conv2dk14.o", stack_size=0xC00) + @core(core_tiles[j][i], stack_size=0xC00) def core_body(): y_dim = height // (kernel_size * 4) x_blocks = 4 diff --git a/programming_examples/ml/conv2d_14x14/conv2dk14_placed.py b/programming_examples/ml/conv2d_14x14/conv2dk14_placed.py index ca2ee6cf6c8..5c4c44f2e3b 100644 --- a/programming_examples/ml/conv2d_14x14/conv2dk14_placed.py +++ b/programming_examples/ml/conv2d_14x14/conv2dk14_placed.py @@ -78,6 +78,7 @@ def device_body(): np.int32, np.int32, ], + link_with="conv2dk14.o", ) # Tile declarations @@ -169,7 +170,7 @@ def device_body(): ) # Compute tile 2 - @core(ComputeTile2, "conv2dk14.o", stack_size=0xC00) + @core(ComputeTile2, stack_size=0xC00) def core_body(): y_dim = height // kernel_size x_blocks = 4 diff --git a/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu_placed.py b/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu_placed.py index 2e9da2ae19f..06d1136e881 100644 --- a/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu_placed.py +++ b/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu_placed.py @@ -60,6 +60,7 @@ def device_body(): np.int32, np.int32, ], + link_with="conv2dk1.o", ) # Tile declarations @@ -98,7 +99,7 @@ def device_body(): ) # Compute tile 2 - @core(ComputeTile2, "conv2dk1.o", stack_size=0xA00) + @core(ComputeTile2, stack_size=0xA00) def core_body(): y_dim = 32 x_dim = 32 diff --git a/programming_examples/ml/magika/group0_placed.py b/programming_examples/ml/magika/group0_placed.py index 0ac1b667842..d735aceab33 100644 --- a/programming_examples/ml/magika/group0_placed.py +++ b/programming_examples/ml/magika/group0_placed.py @@ -60,9 +60,10 @@ def __init__( group0a_func = external_func( "group0a_kernel", inputs=[din_ty, dout_ty, lut0a_ty, scalar_ty, scalar_ty], + link_with=_objectArchive, ) - @core(self.computeTile, self.objectArchive, stack_size=4096) + @core(self.computeTile, stack_size=4096) def core_body(): for _ in range_(sys.maxsize): di = self.din.acquire(ObjectFifoPort.Consume, 1) @@ -123,9 +124,10 @@ def __init__( group0b_func = external_func( "group0b_kernel", inputs=[din_ty, dout_ty, lut0b_a_ty, lut0b_b_ty], + link_with=_objectArchive, ) - @core(self.computeTile, self.objectArchive) + @core(self.computeTile) def core_body(): for _ in range_(sys.maxsize): for ite in range_(32): # 256/8 diff --git a/programming_examples/ml/magika/group1_placed.py b/programming_examples/ml/magika/group1_placed.py index a2da7ed3d4f..8554f4cca64 100644 --- a/programming_examples/ml/magika/group1_placed.py +++ b/programming_examples/ml/magika/group1_placed.py @@ -46,9 +46,10 @@ def __init__( group1_func = external_func( f"group1_{id}_kernel", inputs=[din_ty, dout_ty], + link_with=_objectArchive, ) - @core(self.computeTile, self.objectArchive) + @core(self.computeTile) def core_body(): for _ in range_(sys.maxsize): do = self.dout.acquire(ObjectFifoPort.Produce, 1) @@ -104,14 +105,16 @@ def __init__( group1a_func = external_func( f"group1_{id1}_kernel", inputs=[din_ty, dout2_ty], + link_with=_objectArchive, ) group1b_func = external_func( f"group1_{id2}_kernel", inputs=[din_ty, dout2_ty, dout_ty], + link_with=_objectArchive, ) - @core(self.computeTile1, self.objectArchive) + @core(self.computeTile1) def core_body(): for _ in range_(sys.maxsize): do = self.of_int.acquire(ObjectFifoPort.Produce, 1) @@ -120,7 +123,7 @@ def core_body(): self.din.release(ObjectFifoPort.Consume, 1) self.of_int.release(ObjectFifoPort.Produce, 1) - @core(self.computeTile2, self.objectArchive) + @core(self.computeTile2) def core_body(): for _ in range_(sys.maxsize): do = self.dout.acquire(ObjectFifoPort.Produce, 1) @@ -188,19 +191,22 @@ def __init__( group1a_func = external_func( f"group1_{id1}_kernel", inputs=[din_ty, dout2_ty], + link_with=_objectArchive, ) group1b_func = external_func( f"group1_{id2}_kernel", inputs=[din_ty, dout2_ty, dout2_ty, dout_ty], + link_with=_objectArchive, ) group1c_func = external_func( f"group1_{id3}_kernel", inputs=[din_ty, dout2_ty], + link_with=_objectArchive, ) - @core(self.computeTile1, self.objectArchive) + @core(self.computeTile1) def core_body(): for _ in range_(sys.maxsize): do = self.of_int.acquire(ObjectFifoPort.Produce, 1) @@ -209,7 +215,7 @@ def core_body(): self.din.release(ObjectFifoPort.Consume, 1) self.of_int.release(ObjectFifoPort.Produce, 1) - @core(self.computeTile2, self.objectArchive) + @core(self.computeTile2) def core_body(): for _ in range_(sys.maxsize): do = self.dout.acquire(ObjectFifoPort.Produce, 1) @@ -222,7 +228,7 @@ def core_body(): self.of_int3.release(ObjectFifoPort.Consume, 1) self.dout.release(ObjectFifoPort.Produce, 1) - @core(self.computeTile3, self.objectArchive) + @core(self.computeTile3) def core_body(): for _ in range_(sys.maxsize): do = self.of_int2.acquire(ObjectFifoPort.Produce, 1) diff --git a/programming_examples/ml/magika/group2_placed.py b/programming_examples/ml/magika/group2_placed.py index a9f02af3841..b7394b9fc74 100644 --- a/programming_examples/ml/magika/group2_placed.py +++ b/programming_examples/ml/magika/group2_placed.py @@ -91,13 +91,14 @@ def __init__( group2_func = external_func( "group2_kernel", inputs=[din_ty, lut0_ty, lut1_ty, lut2_ty, lut3_ty], + link_with=_objectArchive, ) output_lock = lock( self.computeTile, lock_id=8, init=0 ) # chooose id=8, objfifo doesn't use it - @core(self.computeTile, self.objectArchive) + @core(self.computeTile) def core_body(): for _ in range_(sys.maxsize): di = self.din.acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie.mlir b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir index 088bc9be826..191efe3020d 100755 --- a/programming_examples/ml/resnet/layers_conv2_x/aie.mlir +++ b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir @@ -114,12 +114,12 @@ aie.device(npu1_3col) { aie.objectfifo @outOFL2L3(%tile24, {%tile10}, 2 : i32) : !aie.objectfifo> //32x1x64 // ___________________________Kernel Call___________________________ - func.func private @conv2dk1_i8(memref<32x1x64xi8>, memref<4096xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () - func.func private @conv2dk3_ui8(memref<32x1x64xui8>,memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () - func.func private @conv2dk1_skip_init_i8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<32768xi8>,memref<32x1x256xui8>,memref<32x1x64xi8>,i32,i32,i32,i32,i32,i32,i32) -> () + func.func private @conv2dk1_i8(memref<32x1x64xi8>, memref<4096xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () attributes {link_with = "conv2dk1_i8.o"} + func.func private @conv2dk3_ui8(memref<32x1x64xui8>,memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () attributes {link_with = "conv2dk3.o"} + func.func private @conv2dk1_skip_init_i8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<32768xi8>,memref<32x1x256xui8>,memref<32x1x64xi8>,i32,i32,i32,i32,i32,i32,i32) -> () attributes {link_with = "conv2dk1_skip_init.o"} - func.func private @conv2dk1_ui8(memref<32x1x256xui8>, memref<16384xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () - func.func private @conv2dk1_skip_ui8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<16384xi8>,memref<32x1x256xui8>,memref<32x1x256xui8>,i32,i32,i32,i32,i32) -> () + func.func private @conv2dk1_ui8(memref<32x1x256xui8>, memref<16384xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () attributes {link_with = "conv2dk1_ui8.o"} + func.func private @conv2dk1_skip_ui8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<16384xi8>,memref<32x1x256xui8>,memref<32x1x256xui8>,i32,i32,i32,i32,i32) -> () attributes {link_with = "conv2dk1_skip.o"} // ___________________________Bottleneck 1___________________________ // 1x1 conv aie.core(%tile02) { @@ -156,7 +156,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_00(Consume, 1) } aie.end - } { link_with="conv2dk1_i8.o" } + } // 3x3 conv aie.core(%tile03) { @@ -239,7 +239,7 @@ aie.device(npu1_3col) { } // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) aie.end - } { link_with="conv2dk3.o" } + } // 3x3 conv aie.core(%tile04) { @@ -321,7 +321,7 @@ aie.device(npu1_3col) { } aie.end - } { link_with="conv2dk3.o" } + } // 1x1 conv with skip aie.core(%tile05) { @@ -373,7 +373,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_02(Consume, 1) } aie.end - } { link_with="conv2dk1_skip_init.o" } + } // ___________________________Bottleneck 2___________________________ // 1x1 conv aie.core(%tile15) { @@ -410,7 +410,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_10(Consume, 1) } aie.end - } { link_with="conv2dk1_ui8.o" } + } // 3x3 conv aie.core(%tile12) { @@ -493,7 +493,7 @@ aie.device(npu1_3col) { } // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) aie.end - } { link_with="conv2dk3.o" } + } // 3x3 conv aie.core(%tile14) { @@ -575,7 +575,7 @@ aie.device(npu1_3col) { } aie.end - } { link_with="conv2dk3.o" } + } // 1x1 conv with skip aie.core(%tile13) { @@ -624,7 +624,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_12(Consume, 1) } aie.end - } { link_with="conv2dk1_skip.o" } + } // ___________________________Bottleneck 3___________________________ @@ -663,7 +663,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_20(Consume, 1) } aie.end - } { link_with="conv2dk1_ui8.o" } + } // 3x3 conv aie.core(%tile23) { @@ -746,7 +746,7 @@ aie.device(npu1_3col) { } // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) aie.end - } { link_with="conv2dk3.o" } + } // 3x3 conv aie.core(%tile25) { @@ -828,7 +828,7 @@ aie.device(npu1_3col) { } aie.end - } { link_with="conv2dk3.o" } + } // 1x1 conv with skip aie.core(%tile24) { @@ -877,7 +877,7 @@ aie.device(npu1_3col) { aie.objectfifo.release @wts_buf_22(Consume, 1) } aie.end - } { link_with="conv2dk1_skip.o" } + } aie.runtime_sequence(%in0 : memref<16384xi32>, %wts0 : memref<53248xi32>, %out : memref<65536xi32>) { diff --git a/programming_examples/ml/resnet/layers_conv2_x/resnet_placed.py b/programming_examples/ml/resnet/layers_conv2_x/resnet_placed.py index 2f29be4113b..26c55e22894 100755 --- a/programming_examples/ml/resnet/layers_conv2_x/resnet_placed.py +++ b/programming_examples/ml/resnet/layers_conv2_x/resnet_placed.py @@ -147,6 +147,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1_i8.o", ) conv2dk3 = external_func( "conv2dk3_ui8", @@ -165,6 +166,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk3.o", ) conv2dk1_skip_init_i8 = external_func( "conv2dk1_skip_init_i8", @@ -182,6 +184,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1_skip_init.o", ) conv2dk1_ui8 = external_func( "conv2dk1_ui8", @@ -194,6 +197,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1_ui8.o", ) conv2dk1_skip_ui8 = external_func( @@ -210,6 +214,7 @@ def deviceBody(): np.int32, np.int32, ], + link_with="conv2dk1_skip.o", ) ShimTile00 = tile(0, 0) @@ -579,7 +584,7 @@ def deviceBody(): # # 1x1 conv2d for i in range(n_cols): - @core(cores[i][0], conv1_kernels[i]) + @core(cores[i][0]) def core_body(): for _ in range_(sys.maxsize): @@ -623,7 +628,7 @@ def core_body(): # 3x3 conv2d OFM 0-31 for i in range(n_cols): - @core(cores[i][1], "conv2dk3.o") + @core(cores[i][1]) def core_body(): scale = 1 for _ in range_(sys.maxsize): @@ -715,7 +720,7 @@ def core_body(): for i in range(n_cols): - @core(cores[i][3], "conv2dk3.o") + @core(cores[i][3]) def core_body(): scale = 1 for _ in range_(sys.maxsize): @@ -807,7 +812,7 @@ def core_body(): # # 1x1 conv2d and add skip for i in range(n_cols): - @core(cores[i][2], conv3_kernels[i], stack_size=0xA00) + @core(cores[i][2], stack_size=0xA00) def core_body(): for _ in range_(sys.maxsize): diff --git a/programming_examples/ml/softmax/softmax_placed.py b/programming_examples/ml/softmax/softmax_placed.py index ddf0e6485ac..456707ce10e 100755 --- a/programming_examples/ml/softmax/softmax_placed.py +++ b/programming_examples/ml/softmax/softmax_placed.py @@ -46,7 +46,7 @@ def device_body(): # AIE Core Function declarations softmax_bf16_vector = external_func( - "softmax_bf16", inputs=[tile_ty, tile_ty, np.int32] + "softmax_bf16", inputs=[tile_ty, tile_ty, np.int32], link_with="kernels.a" ) # Tile declarations @@ -94,7 +94,7 @@ def device_body(): # Set up compute tiles for i in range(n_cores): # Compute tile i - @core(cores[i], "kernels.a") + @core(cores[i]) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles): diff --git a/programming_examples/ml/softmax/softmax_whole_array_placed.py b/programming_examples/ml/softmax/softmax_whole_array_placed.py index bdb93ba812e..ebae88126de 100644 --- a/programming_examples/ml/softmax/softmax_whole_array_placed.py +++ b/programming_examples/ml/softmax/softmax_whole_array_placed.py @@ -61,7 +61,7 @@ def device_body(): # AIE Core Function declarations softmax_bf16_vector = external_func( - "softmax_bf16", inputs=[tile_ty, tile_ty, np.int32] + "softmax_bf16", inputs=[tile_ty, tile_ty, np.int32], link_with="kernels.a" ) # Tile declarations @@ -168,7 +168,7 @@ def device_body(): # Set up compute tiles for i in range(n_cores): # Compute tile i - @core(cores[i], "kernels.a") + @core(cores[i]) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles): diff --git a/programming_examples/mlir/MM_2x2/circuit_switched_version/aie.mlir b/programming_examples/mlir/MM_2x2/circuit_switched_version/aie.mlir index d600a0dee76..e7d6458df31 100755 --- a/programming_examples/mlir/MM_2x2/circuit_switched_version/aie.mlir +++ b/programming_examples/mlir/MM_2x2/circuit_switched_version/aie.mlir @@ -205,7 +205,7 @@ module @MM_2x2 { aie.end } - func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () + func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () attributes {link_with = "kernel.o"} %core63 = aie.core(%t63) { aie.use_lock(%lock63_0, "Acquire", 1) @@ -216,7 +216,7 @@ module @MM_2x2 { aie.use_lock(%lock63_1, "Release", 0) aie.use_lock(%lock63_0, "Release", 0) aie.end - } { link_with="kernel.o" } + } %core64 = aie.core(%t64) { aie.use_lock(%lock63_3, "Acquire", 1) @@ -229,7 +229,7 @@ module @MM_2x2 { aie.use_lock(%lock64_0, "Release", 0) aie.use_lock(%lock63_3, "Release", 0) aie.end - } { link_with="kernel.o" } + } %m73 = aie.mem(%t73) { aie.dma_start("S2MM", 0, ^bd0, ^dma0) @@ -283,7 +283,7 @@ module @MM_2x2 { aie.use_lock(%lock73_1, "Release", 0) aie.use_lock(%lock73_0, "Release", 0) aie.end - } { link_with="kernel.o" } + } %core74 = aie.core(%t74) { aie.use_lock(%lock73_2, "Acquire", 1) @@ -296,5 +296,5 @@ module @MM_2x2 { aie.use_lock(%lock74_0, "Release", 0) aie.use_lock(%lock73_2, "Release", 0) aie.end - } { link_with="kernel.o" } + } } diff --git a/programming_examples/mlir/MM_2x2/objectFifo_circuit_switched_version/aie.mlir b/programming_examples/mlir/MM_2x2/objectFifo_circuit_switched_version/aie.mlir index bcdcd511a33..458e72dfdf6 100755 --- a/programming_examples/mlir/MM_2x2/objectFifo_circuit_switched_version/aie.mlir +++ b/programming_examples/mlir/MM_2x2/objectFifo_circuit_switched_version/aie.mlir @@ -62,7 +62,7 @@ aie.device(xcvc1902) { %buf63 = aie.buffer(%t63) {sym_name = "buf63"} : memref<1024xi32> //Accumulator0 %buf73 = aie.buffer(%t73) {sym_name = "buf73"} : memref<1024xi32> //Accumulator1 - func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () + func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () attributes {link_with = "kernel.o"} %core63 = aie.core(%t63) { %LHS0Subview = aie.objectfifo.acquire @of_LHS0 (Consume, 1) : !aie.objectfifosubview> @@ -81,7 +81,7 @@ aie.device(xcvc1902) { aie.objectfifo.release @of_acc0 (Produce, 1) aie.end - } { link_with="kernel.o" } + } %core64 = aie.core(%t64) { %LHS1Subview = aie.objectfifo.acquire @of_LHS1 (Consume, 1) : !aie.objectfifosubview> @@ -104,7 +104,7 @@ aie.device(xcvc1902) { aie.objectfifo.release @of_out0 (Produce, 1) aie.end - } { link_with="kernel.o" } + } %core73 = aie.core(%t73) { %LHS0Subview = aie.objectfifo.acquire @of_LHS0 (Consume, 1) : !aie.objectfifosubview> @@ -123,7 +123,7 @@ aie.device(xcvc1902) { aie.objectfifo.release @of_acc1 (Produce, 1) aie.end - } { link_with="kernel.o" } + } %core74 = aie.core(%t74) { %LHS1Subview = aie.objectfifo.acquire @of_LHS1 (Consume, 1) : !aie.objectfifosubview> @@ -146,5 +146,5 @@ aie.device(xcvc1902) { aie.objectfifo.release @of_out1 (Produce, 1) aie.end - } { link_with="kernel.o" } + } } diff --git a/programming_examples/mlir/MM_2x2/packet_switched_version/aie.mlir b/programming_examples/mlir/MM_2x2/packet_switched_version/aie.mlir index dc23bc51357..ac750f5b9fe 100644 --- a/programming_examples/mlir/MM_2x2/packet_switched_version/aie.mlir +++ b/programming_examples/mlir/MM_2x2/packet_switched_version/aie.mlir @@ -207,7 +207,7 @@ module @MM_2x2 { aie.end } - func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () + func.func private @extern_kernel(%A: memref<1024xi32>, %B: memref<1024xi32>, %acc: memref<1024xi32>, %C: memref<1024xi32>) -> () attributes {link_with = "kernel.o"} %lock63_3 = aie.lock(%t63, 3) @@ -221,7 +221,7 @@ module @MM_2x2 { aie.use_lock(%lock63_0, "Release", 0) aie.end - } { link_with="kernel.o" } + } %core64 = aie.core(%t64) { @@ -235,7 +235,7 @@ module @MM_2x2 { aie.use_lock(%lock64_0, "Release", 0) aie.use_lock(%lock63_3, "Release", 0) aie.end - } { link_with="kernel.o" } + } %lock73_0 = aie.lock(%t73, 0) @@ -300,7 +300,7 @@ module @MM_2x2 { aie.use_lock(%lock73_1, "Release", 0) aie.use_lock(%lock73_0, "Release", 0) aie.end - } { link_with="kernel.o" } + } %core74 = aie.core(%t74) { aie.use_lock(%lock73_2, "Acquire", 1) @@ -313,7 +313,7 @@ module @MM_2x2 { aie.use_lock(%lock74_0, "Release", 0) aie.use_lock(%lock73_2, "Release", 0) aie.end - } { link_with="kernel.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie.mlir index 985f4b38503..214061a0982 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie.mlir @@ -30,7 +30,7 @@ module @hdiff_multi_AIE{ aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xi32>) aie.objectfifo.register_external_buffers @obj_out_flux (%t70, {%ext_buffer_out}) : (memref<512xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} %c13 = aie.core(%t71) { %lb = arith.constant 0 : index @@ -59,9 +59,9 @@ module @hdiff_multi_AIE{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_lap.o" } + } -func.func private @hdiff_flux(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OF: memref<256xi32>) -> () +func.func private @hdiff_flux(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OF: memref<256xi32>) -> () attributes {link_with = "hdiff_flux.o"} %c14 = aie.core(%t72) { %lb = arith.constant 0 : index @@ -91,5 +91,5 @@ func.func private @hdiff_flux(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: m aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_flux.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie_fp32.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie_fp32.mlir index 78a78e8eb68..a554c95c0e5 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie_fp32.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_dual_AIE_objectFIFO_ping_pong/aie_fp32.mlir @@ -29,7 +29,7 @@ module @hdiff_multi_AIE{ aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xf32>) aie.objectfifo.register_external_buffers @obj_out_flux (%t70, {%ext_buffer_out}) : (memref<512xf32>) - func.func private @hdiff_lap_fp32(%AL: memref<256xf32>,%BL: memref<256xf32>, %CL: memref<256xf32>, %DL: memref<256xf32>, %EL: memref<256xf32>, %OLL1: memref<256xf32>, %OLL2: memref<256xf32>, %OLL3: memref<256xf32>, %OLL4: memref<256xf32>) -> () + func.func private @hdiff_lap_fp32(%AL: memref<256xf32>,%BL: memref<256xf32>, %CL: memref<256xf32>, %DL: memref<256xf32>, %EL: memref<256xf32>, %OLL1: memref<256xf32>, %OLL2: memref<256xf32>, %OLL3: memref<256xf32>, %OLL4: memref<256xf32>) -> () attributes {link_with = "hdiff_lap_fp32.o"} %c13 = aie.core(%t71) { @@ -60,9 +60,9 @@ module @hdiff_multi_AIE{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_lap_fp32.o" } + } -func.func private @hdiff_flux_fp32(%AF: memref<256xf32>,%BF: memref<256xf32>, %CF: memref<256xf32>, %OLF1: memref<256xf32>, %OLF2: memref<256xf32>, %OLF3: memref<256xf32>, %OLF4: memref<256xf32>, %OF: memref<256xf32>) -> () +func.func private @hdiff_flux_fp32(%AF: memref<256xf32>,%BF: memref<256xf32>, %CF: memref<256xf32>, %OLF1: memref<256xf32>, %OLF2: memref<256xf32>, %OLF3: memref<256xf32>, %OLF4: memref<256xf32>, %OF: memref<256xf32>) -> () attributes {link_with = "hdiff_flux_fp32.o"} %c14 = aie.core(%t72) { @@ -95,7 +95,7 @@ func.func private @hdiff_flux_fp32(%AF: memref<256xf32>,%BF: memref<256xf32>, %C aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_flux_fp32.o" } + } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie.mlir index 6b8907f080b..968ec361dcf 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie.mlir @@ -27,7 +27,7 @@ module @hdiff_single_AIE { aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xi32>) aie.objectfifo.register_external_buffers @obj_out (%t70, {%ext_buffer_out}) : (memref<512xi32>) - func.func private @vec_hdiff(%A: memref<256xi32>,%B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () + func.func private @vec_hdiff(%A: memref<256xi32>,%B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () attributes {link_with = "hdiff.o"} %c13 = aie.core(%t71) { %lb = arith.constant 0 : index @@ -53,5 +53,5 @@ module @hdiff_single_AIE { aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie_fp32.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie_fp32.mlir index 1533c5052eb..10f6adc49df 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie_fp32.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO/aie_fp32.mlir @@ -27,7 +27,7 @@ module @hdiff_single_AIE_fp32{ aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xf32>) aie.objectfifo.register_external_buffers @obj_out (%t70, {%ext_buffer_out}) : (memref<512xf32>) - func.func private @vec_hdiff_fp32(%A: memref<256xf32>,%B: memref<256xf32>, %C: memref<256xf32>, %D: memref<256xf32>, %E: memref<256xf32>, %O: memref<256xf32>) -> () + func.func private @vec_hdiff_fp32(%A: memref<256xf32>,%B: memref<256xf32>, %C: memref<256xf32>, %D: memref<256xf32>, %E: memref<256xf32>, %O: memref<256xf32>) -> () attributes {link_with = "hdiff_fp32.o"} %c13 = aie.core(%t71) { @@ -55,7 +55,7 @@ module @hdiff_single_AIE_fp32{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_fp32.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie.mlir index c3f43f522d8..455f6e9a3e5 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie.mlir @@ -26,7 +26,7 @@ module @hdiff_single_AIE { aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xi32>) aie.objectfifo.register_external_buffers @obj_out (%t70, {%ext_buffer_out}) : (memref<512xi32>) - func.func private @vec_hdiff(%A: memref<256xi32>, %B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () + func.func private @vec_hdiff(%A: memref<256xi32>, %B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () attributes {link_with = "hdiff.o"} %c13 = aie.core(%t71) { %lb = arith.constant 0 : index @@ -52,5 +52,5 @@ module @hdiff_single_AIE { aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie_fp32.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie_fp32.mlir index 2384b35a6ec..75e61f7a774 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie_fp32.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong/aie_fp32.mlir @@ -28,7 +28,7 @@ module @hdiff_single_AIE_fp32{ aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xf32>) aie.objectfifo.register_external_buffers @obj_out (%t70, {%ext_buffer_out}) : (memref<512xf32>) - func.func private @vec_hdiff_fp32(%A: memref<256xf32>,%B: memref<256xf32>, %C: memref<256xf32>, %D: memref<256xf32>, %E: memref<256xf32>, %O: memref<256xf32>) -> () + func.func private @vec_hdiff_fp32(%A: memref<256xf32>,%B: memref<256xf32>, %C: memref<256xf32>, %D: memref<256xf32>, %E: memref<256xf32>, %O: memref<256xf32>) -> () attributes {link_with = "hdiff_fp32.o"} %c13 = aie.core(%t71) { @@ -56,7 +56,7 @@ module @hdiff_single_AIE_fp32{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_fp32.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong_scaled/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong_scaled/aie.mlir index a5700619bd0..5d86cfe169c 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong_scaled/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_single_AIE_objectFIFO_ping_pong_scaled/aie.mlir @@ -398,7 +398,7 @@ module @hdiff_large_0 { aie.objectfifo.register_external_buffers(%tile47_0, %buf_out_31_2_shim_47 : !aie.objectfifo>, {%ext_buffer_out_31_2}) : (memref<512xi32>) - func.func private @vec_hdiff(%A: memref<256xi32>,%B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () + func.func private @vec_hdiff(%A: memref<256xi32>,%B: memref<256xi32>, %C: memref<256xi32>, %D: memref<256xi32>, %E: memref<256xi32>, %O: memref<256xi32>) -> () attributes {link_with = "hdiff.o"} %core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -420,7 +420,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_0_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -442,7 +442,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_1_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core2_2 = aie.core(%tile2_2) { %lb = arith.constant 0 : index @@ -464,7 +464,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_2_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -486,7 +486,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_3_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -508,7 +508,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_4_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core5_2 = aie.core(%tile5_2) { %lb = arith.constant 0 : index @@ -530,7 +530,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_5_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core6_2 = aie.core(%tile6_2) { %lb = arith.constant 0 : index @@ -552,7 +552,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_6_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core7_2 = aie.core(%tile7_2) { %lb = arith.constant 0 : index @@ -574,7 +574,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_7_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core8_2 = aie.core(%tile8_2) { %lb = arith.constant 0 : index @@ -596,7 +596,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_8_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core9_2 = aie.core(%tile9_2) { %lb = arith.constant 0 : index @@ -618,7 +618,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_9_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core10_2 = aie.core(%tile10_2) { %lb = arith.constant 0 : index @@ -640,7 +640,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_10_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core11_2 = aie.core(%tile11_2) { %lb = arith.constant 0 : index @@ -662,7 +662,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_11_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core12_2 = aie.core(%tile12_2) { %lb = arith.constant 0 : index @@ -684,7 +684,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_12_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core13_2 = aie.core(%tile13_2) { %lb = arith.constant 0 : index @@ -706,7 +706,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_13_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core14_2 = aie.core(%tile14_2) { %lb = arith.constant 0 : index @@ -728,7 +728,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_14_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core15_2 = aie.core(%tile15_2) { %lb = arith.constant 0 : index @@ -750,7 +750,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_15_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core16_2 = aie.core(%tile16_2) { %lb = arith.constant 0 : index @@ -772,7 +772,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_16_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core17_2 = aie.core(%tile17_2) { %lb = arith.constant 0 : index @@ -794,7 +794,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_17_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core18_2 = aie.core(%tile18_2) { %lb = arith.constant 0 : index @@ -816,7 +816,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_18_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core19_2 = aie.core(%tile19_2) { %lb = arith.constant 0 : index @@ -838,7 +838,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_19_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core20_2 = aie.core(%tile20_2) { %lb = arith.constant 0 : index @@ -860,7 +860,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_20_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core21_2 = aie.core(%tile21_2) { %lb = arith.constant 0 : index @@ -882,7 +882,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_21_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core22_2 = aie.core(%tile22_2) { %lb = arith.constant 0 : index @@ -904,7 +904,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_22_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core23_2 = aie.core(%tile23_2) { %lb = arith.constant 0 : index @@ -926,7 +926,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_23_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core24_2 = aie.core(%tile24_2) { %lb = arith.constant 0 : index @@ -948,7 +948,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_24_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core25_2 = aie.core(%tile25_2) { %lb = arith.constant 0 : index @@ -970,7 +970,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_25_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core26_2 = aie.core(%tile26_2) { %lb = arith.constant 0 : index @@ -992,7 +992,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_26_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core27_2 = aie.core(%tile27_2) { %lb = arith.constant 0 : index @@ -1014,7 +1014,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_27_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core28_2 = aie.core(%tile28_2) { %lb = arith.constant 0 : index @@ -1036,7 +1036,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_28_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core29_2 = aie.core(%tile29_2) { %lb = arith.constant 0 : index @@ -1058,7 +1058,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_29_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core30_2 = aie.core(%tile30_2) { %lb = arith.constant 0 : index @@ -1080,7 +1080,7 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_30_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } %core31_2 = aie.core(%tile31_2) { %lb = arith.constant 0 : index @@ -1102,6 +1102,6 @@ module @hdiff_large_0 { aie.objectfifo.release(%buf_in_31_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie.mlir index 1566168882c..523a6cc6a80 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie.mlir @@ -32,9 +32,9 @@ module @hdiff_tri_AIE { aie.objectfifo.register_external_buffers @obj_in (%t70, {%ext_buffer_in0}) : (memref<1536xi32>) aie.objectfifo.register_external_buffers @obj_out_flux (%t70, {%ext_buffer_out}) : (memref<512xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %c13 = aie.core(%t71) { %lb = arith.constant 0 : index @@ -62,7 +62,7 @@ module @hdiff_tri_AIE { aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_lap.o" } + } %c14 = aie.core(%t72) { %lb = arith.constant 0 : index @@ -97,7 +97,7 @@ module @hdiff_tri_AIE { aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_flux1.o" } + } %c15 = aie.core(%t73) { %lb = arith.constant 0 : index @@ -123,5 +123,5 @@ module @hdiff_tri_AIE { aie.use_lock(%lock73_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie_fp32.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie_fp32.mlir index e91361b78cd..3e55315f3c5 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie_fp32.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong/aie_fp32.mlir @@ -33,9 +33,9 @@ module @hdiff_tri_AIE{ aie.objectfifo.register_external_buffers @obj_out_flux (%t70, {%ext_buffer_out}) : (memref<512xf32>) - func.func private @hdiff_lap_fp32(%AL: memref<256xf32>,%BL: memref<256xf32>, %CL: memref<256xf32>, %DL: memref<256xf32>, %EL: memref<256xf32>, %OLL1: memref<256xf32>, %OLL2: memref<256xf32>, %OLL3: memref<256xf32>, %OLL4: memref<256xf32>) -> () - func.func private @hdiff_flux1_fp32(%AF: memref<256xf32>,%BF: memref<256xf32>, %CF: memref<256xf32>, %OLF1: memref<256xf32>, %OLF2: memref<256xf32>, %OLF3: memref<256xf32>, %OLF4: memref<256xf32>, %OFI1: memref<512xf32>, %OFI2: memref<512xf32>, %OFI3: memref<512xf32>, %OFI4: memref<512xf32>, %OFI5: memref<512xf32>) -> () - func.func private @hdiff_flux2_fp32( %Inter1: memref<512xf32>,%Inter2: memref<512xf32>, %Inter3: memref<512xf32>,%Inter4: memref<512xf32>,%Inter5: memref<512xf32>, %Out: memref<256xf32>) -> () + func.func private @hdiff_lap_fp32(%AL: memref<256xf32>,%BL: memref<256xf32>, %CL: memref<256xf32>, %DL: memref<256xf32>, %EL: memref<256xf32>, %OLL1: memref<256xf32>, %OLL2: memref<256xf32>, %OLL3: memref<256xf32>, %OLL4: memref<256xf32>) -> () attributes {link_with = "hdiff_lap_fp32.o"} + func.func private @hdiff_flux1_fp32(%AF: memref<256xf32>,%BF: memref<256xf32>, %CF: memref<256xf32>, %OLF1: memref<256xf32>, %OLF2: memref<256xf32>, %OLF3: memref<256xf32>, %OLF4: memref<256xf32>, %OFI1: memref<512xf32>, %OFI2: memref<512xf32>, %OFI3: memref<512xf32>, %OFI4: memref<512xf32>, %OFI5: memref<512xf32>) -> () attributes {link_with = "hdiff_flux1_fp32.o"} + func.func private @hdiff_flux2_fp32( %Inter1: memref<512xf32>,%Inter2: memref<512xf32>, %Inter3: memref<512xf32>,%Inter4: memref<512xf32>,%Inter5: memref<512xf32>, %Out: memref<256xf32>) -> () attributes {link_with = "hdiff_flux2_fp32.o"} %c13 = aie.core(%t71) { @@ -66,7 +66,7 @@ module @hdiff_tri_AIE{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_lap_fp32.o" } + } %c14 = aie.core(%t72) { @@ -103,7 +103,7 @@ module @hdiff_tri_AIE{ aie.objectfifo.release @obj_in (Consume, 4) aie.end - } { link_with="hdiff_flux1_fp32.o" } + } %c15 = aie.core(%t73) { %lb = arith.constant 0 : index @@ -131,7 +131,7 @@ module @hdiff_tri_AIE{ aie.use_lock(%lock73_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2_fp32.o" } + } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie.mlir index 78de65d0928..b313711740f 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie.mlir @@ -57,9 +57,9 @@ module @hdiff_bundle_1 { aie.objectfifo.register_external_buffers(%tile2_0, %block_0_buf_out_shim_2 : !aie.objectfifo>, {%ext_buffer_out_0}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -87,7 +87,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -120,7 +120,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -144,7 +144,7 @@ module @hdiff_bundle_1 { } aie.use_lock(%lock21_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -171,7 +171,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -204,7 +204,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -248,7 +248,7 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_out_shim_2:!aie.objectfifo>, 4) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -275,7 +275,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -308,7 +308,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -331,7 +331,7 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -358,7 +358,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -391,7 +391,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -414,6 +414,6 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_1.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_1.mlir index a4b619191ad..2a3dcddf6bc 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_1.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_1.mlir @@ -57,9 +57,9 @@ module @hdiff_bundle_1 { aie.objectfifo.register_external_buffers(%tile2_0, %block_0_buf_out_shim_2 : !aie.objectfifo>, {%ext_buffer_out_0}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -87,7 +87,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -120,7 +120,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -144,7 +144,7 @@ module @hdiff_bundle_1 { } aie.use_lock(%lock21_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -171,7 +171,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -204,7 +204,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -248,7 +248,7 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_out_shim_2:!aie.objectfifo>, 4) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -275,7 +275,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -308,7 +308,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -331,7 +331,7 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -358,7 +358,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -391,7 +391,7 @@ module @hdiff_bundle_1 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -414,6 +414,6 @@ module @hdiff_bundle_1 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_16.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_16.mlir index 588bcab596d..ea6abb91863 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_16.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_16.mlir @@ -687,9 +687,9 @@ module @hdiff_bundle_16 { aie.objectfifo.register_external_buffers(%tile19_0, %block_15_buf_out_shim_19 : !aie.objectfifo>, {%ext_buffer_out_15}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -716,7 +716,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -749,7 +749,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -772,7 +772,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_0_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -800,7 +800,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -833,7 +833,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -878,7 +878,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock22_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -905,7 +905,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -938,7 +938,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -961,7 +961,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -988,7 +988,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -1021,7 +1021,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -1044,7 +1044,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -1071,7 +1071,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -1104,7 +1104,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -1127,7 +1127,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_1_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -1155,7 +1155,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -1188,7 +1188,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -1233,7 +1233,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock26_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -1260,7 +1260,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -1293,7 +1293,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -1316,7 +1316,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -1343,7 +1343,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -1376,7 +1376,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -1399,7 +1399,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_1 = aie.core(%tile3_1) { %lb = arith.constant 0 : index @@ -1426,7 +1426,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_1 = aie.core(%tile4_1) { %lb = arith.constant 0 : index @@ -1459,7 +1459,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_1 = aie.core(%tile5_1) { %lb = arith.constant 0 : index @@ -1482,7 +1482,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_2_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -1510,7 +1510,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -1543,7 +1543,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_2_core5_2 = aie.core(%tile5_2) { @@ -1588,7 +1588,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock52_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_3 = aie.core(%tile3_3) { %lb = arith.constant 0 : index @@ -1615,7 +1615,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_3 = aie.core(%tile4_3) { %lb = arith.constant 0 : index @@ -1648,7 +1648,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_3 = aie.core(%tile5_3) { %lb = arith.constant 0 : index @@ -1671,7 +1671,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_2_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_4 = aie.core(%tile3_4) { %lb = arith.constant 0 : index @@ -1698,7 +1698,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_4 = aie.core(%tile4_4) { %lb = arith.constant 0 : index @@ -1731,7 +1731,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_4 = aie.core(%tile5_4) { %lb = arith.constant 0 : index @@ -1754,7 +1754,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_2_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_5 = aie.core(%tile3_5) { %lb = arith.constant 0 : index @@ -1781,7 +1781,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_5 = aie.core(%tile4_5) { %lb = arith.constant 0 : index @@ -1814,7 +1814,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_5 = aie.core(%tile5_5) { %lb = arith.constant 0 : index @@ -1837,7 +1837,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_3_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_6 = aie.core(%tile3_6) { %lb = arith.constant 0 : index @@ -1865,7 +1865,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_6 = aie.core(%tile4_6) { %lb = arith.constant 0 : index @@ -1898,7 +1898,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_3_core5_6 = aie.core(%tile5_6) { @@ -1943,7 +1943,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock56_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_7 = aie.core(%tile3_7) { %lb = arith.constant 0 : index @@ -1970,7 +1970,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_7 = aie.core(%tile4_7) { %lb = arith.constant 0 : index @@ -2003,7 +2003,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_7 = aie.core(%tile5_7) { %lb = arith.constant 0 : index @@ -2026,7 +2026,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_3_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_8 = aie.core(%tile3_8) { %lb = arith.constant 0 : index @@ -2053,7 +2053,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_8 = aie.core(%tile4_8) { %lb = arith.constant 0 : index @@ -2086,7 +2086,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_8 = aie.core(%tile5_8) { %lb = arith.constant 0 : index @@ -2109,7 +2109,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_3_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_1 = aie.core(%tile6_1) { %lb = arith.constant 0 : index @@ -2136,7 +2136,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_1 = aie.core(%tile7_1) { %lb = arith.constant 0 : index @@ -2169,7 +2169,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_1 = aie.core(%tile8_1) { %lb = arith.constant 0 : index @@ -2192,7 +2192,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_4_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_2 = aie.core(%tile6_2) { %lb = arith.constant 0 : index @@ -2220,7 +2220,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_2 = aie.core(%tile7_2) { %lb = arith.constant 0 : index @@ -2253,7 +2253,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_4_core8_2 = aie.core(%tile8_2) { @@ -2298,7 +2298,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock82_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_3 = aie.core(%tile6_3) { %lb = arith.constant 0 : index @@ -2325,7 +2325,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_3 = aie.core(%tile7_3) { %lb = arith.constant 0 : index @@ -2358,7 +2358,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_3 = aie.core(%tile8_3) { %lb = arith.constant 0 : index @@ -2381,7 +2381,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_4_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_4 = aie.core(%tile6_4) { %lb = arith.constant 0 : index @@ -2408,7 +2408,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_4 = aie.core(%tile7_4) { %lb = arith.constant 0 : index @@ -2441,7 +2441,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_4 = aie.core(%tile8_4) { %lb = arith.constant 0 : index @@ -2464,7 +2464,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_4_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_5 = aie.core(%tile6_5) { %lb = arith.constant 0 : index @@ -2491,7 +2491,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_5 = aie.core(%tile7_5) { %lb = arith.constant 0 : index @@ -2524,7 +2524,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_5 = aie.core(%tile8_5) { %lb = arith.constant 0 : index @@ -2547,7 +2547,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_5_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_6 = aie.core(%tile6_6) { %lb = arith.constant 0 : index @@ -2575,7 +2575,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_6 = aie.core(%tile7_6) { %lb = arith.constant 0 : index @@ -2608,7 +2608,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_5_core8_6 = aie.core(%tile8_6) { @@ -2653,7 +2653,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock86_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_7 = aie.core(%tile6_7) { %lb = arith.constant 0 : index @@ -2680,7 +2680,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_7 = aie.core(%tile7_7) { %lb = arith.constant 0 : index @@ -2713,7 +2713,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_7 = aie.core(%tile8_7) { %lb = arith.constant 0 : index @@ -2736,7 +2736,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_5_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_8 = aie.core(%tile6_8) { %lb = arith.constant 0 : index @@ -2763,7 +2763,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_8 = aie.core(%tile7_8) { %lb = arith.constant 0 : index @@ -2796,7 +2796,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_8 = aie.core(%tile8_8) { %lb = arith.constant 0 : index @@ -2819,7 +2819,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_5_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_1 = aie.core(%tile9_1) { %lb = arith.constant 0 : index @@ -2846,7 +2846,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_1 = aie.core(%tile10_1) { %lb = arith.constant 0 : index @@ -2879,7 +2879,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_1 = aie.core(%tile11_1) { %lb = arith.constant 0 : index @@ -2902,7 +2902,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_6_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_2 = aie.core(%tile9_2) { %lb = arith.constant 0 : index @@ -2930,7 +2930,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_2 = aie.core(%tile10_2) { %lb = arith.constant 0 : index @@ -2963,7 +2963,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_6_core11_2 = aie.core(%tile11_2) { @@ -3008,7 +3008,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock112_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_3 = aie.core(%tile9_3) { %lb = arith.constant 0 : index @@ -3035,7 +3035,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_3 = aie.core(%tile10_3) { %lb = arith.constant 0 : index @@ -3068,7 +3068,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_3 = aie.core(%tile11_3) { %lb = arith.constant 0 : index @@ -3091,7 +3091,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_6_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_4 = aie.core(%tile9_4) { %lb = arith.constant 0 : index @@ -3118,7 +3118,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_4 = aie.core(%tile10_4) { %lb = arith.constant 0 : index @@ -3151,7 +3151,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_4 = aie.core(%tile11_4) { %lb = arith.constant 0 : index @@ -3174,7 +3174,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_6_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_5 = aie.core(%tile9_5) { %lb = arith.constant 0 : index @@ -3201,7 +3201,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_5 = aie.core(%tile10_5) { %lb = arith.constant 0 : index @@ -3234,7 +3234,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_5 = aie.core(%tile11_5) { %lb = arith.constant 0 : index @@ -3257,7 +3257,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_7_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_6 = aie.core(%tile9_6) { %lb = arith.constant 0 : index @@ -3285,7 +3285,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_6 = aie.core(%tile10_6) { %lb = arith.constant 0 : index @@ -3318,7 +3318,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_7_core11_6 = aie.core(%tile11_6) { @@ -3363,7 +3363,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock116_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_7 = aie.core(%tile9_7) { %lb = arith.constant 0 : index @@ -3390,7 +3390,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_7 = aie.core(%tile10_7) { %lb = arith.constant 0 : index @@ -3423,7 +3423,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_7 = aie.core(%tile11_7) { %lb = arith.constant 0 : index @@ -3446,7 +3446,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_7_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_8 = aie.core(%tile9_8) { %lb = arith.constant 0 : index @@ -3473,7 +3473,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_8 = aie.core(%tile10_8) { %lb = arith.constant 0 : index @@ -3506,7 +3506,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_8 = aie.core(%tile11_8) { %lb = arith.constant 0 : index @@ -3529,7 +3529,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_7_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_1 = aie.core(%tile12_1) { %lb = arith.constant 0 : index @@ -3556,7 +3556,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_1 = aie.core(%tile13_1) { %lb = arith.constant 0 : index @@ -3589,7 +3589,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_1 = aie.core(%tile14_1) { %lb = arith.constant 0 : index @@ -3612,7 +3612,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_8_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_2 = aie.core(%tile12_2) { %lb = arith.constant 0 : index @@ -3640,7 +3640,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_2 = aie.core(%tile13_2) { %lb = arith.constant 0 : index @@ -3673,7 +3673,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_8_core14_2 = aie.core(%tile14_2) { @@ -3718,7 +3718,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock142_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_3 = aie.core(%tile12_3) { %lb = arith.constant 0 : index @@ -3745,7 +3745,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_3 = aie.core(%tile13_3) { %lb = arith.constant 0 : index @@ -3778,7 +3778,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_3 = aie.core(%tile14_3) { %lb = arith.constant 0 : index @@ -3801,7 +3801,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_8_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_4 = aie.core(%tile12_4) { %lb = arith.constant 0 : index @@ -3828,7 +3828,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_4 = aie.core(%tile13_4) { %lb = arith.constant 0 : index @@ -3861,7 +3861,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_4 = aie.core(%tile14_4) { %lb = arith.constant 0 : index @@ -3884,7 +3884,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_8_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_5 = aie.core(%tile12_5) { %lb = arith.constant 0 : index @@ -3911,7 +3911,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_5 = aie.core(%tile13_5) { %lb = arith.constant 0 : index @@ -3944,7 +3944,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_5 = aie.core(%tile14_5) { %lb = arith.constant 0 : index @@ -3967,7 +3967,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_9_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_6 = aie.core(%tile12_6) { %lb = arith.constant 0 : index @@ -3995,7 +3995,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_6 = aie.core(%tile13_6) { %lb = arith.constant 0 : index @@ -4028,7 +4028,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_9_core14_6 = aie.core(%tile14_6) { @@ -4073,7 +4073,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock146_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_7 = aie.core(%tile12_7) { %lb = arith.constant 0 : index @@ -4100,7 +4100,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_7 = aie.core(%tile13_7) { %lb = arith.constant 0 : index @@ -4133,7 +4133,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_7 = aie.core(%tile14_7) { %lb = arith.constant 0 : index @@ -4156,7 +4156,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_9_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_8 = aie.core(%tile12_8) { %lb = arith.constant 0 : index @@ -4183,7 +4183,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_8 = aie.core(%tile13_8) { %lb = arith.constant 0 : index @@ -4216,7 +4216,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_8 = aie.core(%tile14_8) { %lb = arith.constant 0 : index @@ -4239,7 +4239,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_9_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_1 = aie.core(%tile15_1) { %lb = arith.constant 0 : index @@ -4266,7 +4266,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_1 = aie.core(%tile16_1) { %lb = arith.constant 0 : index @@ -4299,7 +4299,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_1 = aie.core(%tile17_1) { %lb = arith.constant 0 : index @@ -4322,7 +4322,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_10_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_2 = aie.core(%tile15_2) { %lb = arith.constant 0 : index @@ -4350,7 +4350,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_2 = aie.core(%tile16_2) { %lb = arith.constant 0 : index @@ -4383,7 +4383,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_10_core17_2 = aie.core(%tile17_2) { @@ -4428,7 +4428,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock172_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_3 = aie.core(%tile15_3) { %lb = arith.constant 0 : index @@ -4455,7 +4455,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_3 = aie.core(%tile16_3) { %lb = arith.constant 0 : index @@ -4488,7 +4488,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_3 = aie.core(%tile17_3) { %lb = arith.constant 0 : index @@ -4511,7 +4511,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_10_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_4 = aie.core(%tile15_4) { %lb = arith.constant 0 : index @@ -4538,7 +4538,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_4 = aie.core(%tile16_4) { %lb = arith.constant 0 : index @@ -4571,7 +4571,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_4 = aie.core(%tile17_4) { %lb = arith.constant 0 : index @@ -4594,7 +4594,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_10_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_5 = aie.core(%tile15_5) { %lb = arith.constant 0 : index @@ -4621,7 +4621,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_5 = aie.core(%tile16_5) { %lb = arith.constant 0 : index @@ -4654,7 +4654,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_5 = aie.core(%tile17_5) { %lb = arith.constant 0 : index @@ -4677,7 +4677,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_11_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_6 = aie.core(%tile15_6) { %lb = arith.constant 0 : index @@ -4705,7 +4705,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_6 = aie.core(%tile16_6) { %lb = arith.constant 0 : index @@ -4738,7 +4738,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_11_core17_6 = aie.core(%tile17_6) { @@ -4783,7 +4783,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock176_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_7 = aie.core(%tile15_7) { %lb = arith.constant 0 : index @@ -4810,7 +4810,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_7 = aie.core(%tile16_7) { %lb = arith.constant 0 : index @@ -4843,7 +4843,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_7 = aie.core(%tile17_7) { %lb = arith.constant 0 : index @@ -4866,7 +4866,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_11_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_8 = aie.core(%tile15_8) { %lb = arith.constant 0 : index @@ -4893,7 +4893,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_8 = aie.core(%tile16_8) { %lb = arith.constant 0 : index @@ -4926,7 +4926,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_8 = aie.core(%tile17_8) { %lb = arith.constant 0 : index @@ -4949,7 +4949,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_11_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_1 = aie.core(%tile18_1) { %lb = arith.constant 0 : index @@ -4976,7 +4976,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_1 = aie.core(%tile19_1) { %lb = arith.constant 0 : index @@ -5009,7 +5009,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_1 = aie.core(%tile20_1) { %lb = arith.constant 0 : index @@ -5032,7 +5032,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_12_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_2 = aie.core(%tile18_2) { %lb = arith.constant 0 : index @@ -5060,7 +5060,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_2 = aie.core(%tile19_2) { %lb = arith.constant 0 : index @@ -5093,7 +5093,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_12_core20_2 = aie.core(%tile20_2) { @@ -5138,7 +5138,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock202_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_3 = aie.core(%tile18_3) { %lb = arith.constant 0 : index @@ -5165,7 +5165,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_3 = aie.core(%tile19_3) { %lb = arith.constant 0 : index @@ -5198,7 +5198,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_3 = aie.core(%tile20_3) { %lb = arith.constant 0 : index @@ -5221,7 +5221,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_12_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_4 = aie.core(%tile18_4) { %lb = arith.constant 0 : index @@ -5248,7 +5248,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_4 = aie.core(%tile19_4) { %lb = arith.constant 0 : index @@ -5281,7 +5281,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_4 = aie.core(%tile20_4) { %lb = arith.constant 0 : index @@ -5304,7 +5304,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_12_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_5 = aie.core(%tile18_5) { %lb = arith.constant 0 : index @@ -5331,7 +5331,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_5 = aie.core(%tile19_5) { %lb = arith.constant 0 : index @@ -5364,7 +5364,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_5 = aie.core(%tile20_5) { %lb = arith.constant 0 : index @@ -5387,7 +5387,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_13_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_6 = aie.core(%tile18_6) { %lb = arith.constant 0 : index @@ -5415,7 +5415,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_6 = aie.core(%tile19_6) { %lb = arith.constant 0 : index @@ -5448,7 +5448,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_13_core20_6 = aie.core(%tile20_6) { @@ -5493,7 +5493,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock206_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_7 = aie.core(%tile18_7) { %lb = arith.constant 0 : index @@ -5520,7 +5520,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_7 = aie.core(%tile19_7) { %lb = arith.constant 0 : index @@ -5553,7 +5553,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_7 = aie.core(%tile20_7) { %lb = arith.constant 0 : index @@ -5576,7 +5576,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_13_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_8 = aie.core(%tile18_8) { %lb = arith.constant 0 : index @@ -5603,7 +5603,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_8 = aie.core(%tile19_8) { %lb = arith.constant 0 : index @@ -5636,7 +5636,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_8 = aie.core(%tile20_8) { %lb = arith.constant 0 : index @@ -5659,7 +5659,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_13_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_1 = aie.core(%tile21_1) { %lb = arith.constant 0 : index @@ -5686,7 +5686,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_1 = aie.core(%tile22_1) { %lb = arith.constant 0 : index @@ -5719,7 +5719,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_1 = aie.core(%tile23_1) { %lb = arith.constant 0 : index @@ -5742,7 +5742,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_14_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_2 = aie.core(%tile21_2) { %lb = arith.constant 0 : index @@ -5770,7 +5770,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_2 = aie.core(%tile22_2) { %lb = arith.constant 0 : index @@ -5803,7 +5803,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_14_core23_2 = aie.core(%tile23_2) { @@ -5848,7 +5848,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock232_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_3 = aie.core(%tile21_3) { %lb = arith.constant 0 : index @@ -5875,7 +5875,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_3 = aie.core(%tile22_3) { %lb = arith.constant 0 : index @@ -5908,7 +5908,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_3 = aie.core(%tile23_3) { %lb = arith.constant 0 : index @@ -5931,7 +5931,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_14_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_4 = aie.core(%tile21_4) { %lb = arith.constant 0 : index @@ -5958,7 +5958,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_4 = aie.core(%tile22_4) { %lb = arith.constant 0 : index @@ -5991,7 +5991,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_4 = aie.core(%tile23_4) { %lb = arith.constant 0 : index @@ -6014,7 +6014,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_14_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_5 = aie.core(%tile21_5) { %lb = arith.constant 0 : index @@ -6041,7 +6041,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_5 = aie.core(%tile22_5) { %lb = arith.constant 0 : index @@ -6074,7 +6074,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_5 = aie.core(%tile23_5) { %lb = arith.constant 0 : index @@ -6097,7 +6097,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_15_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_6 = aie.core(%tile21_6) { %lb = arith.constant 0 : index @@ -6125,7 +6125,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_6 = aie.core(%tile22_6) { %lb = arith.constant 0 : index @@ -6158,7 +6158,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_15_core23_6 = aie.core(%tile23_6) { @@ -6203,7 +6203,7 @@ module @hdiff_bundle_16 { } aie.use_lock(%lock236_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_7 = aie.core(%tile21_7) { %lb = arith.constant 0 : index @@ -6230,7 +6230,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_7 = aie.core(%tile22_7) { %lb = arith.constant 0 : index @@ -6263,7 +6263,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_7 = aie.core(%tile23_7) { %lb = arith.constant 0 : index @@ -6286,7 +6286,7 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_15_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_8 = aie.core(%tile21_8) { %lb = arith.constant 0 : index @@ -6313,7 +6313,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_8 = aie.core(%tile22_8) { %lb = arith.constant 0 : index @@ -6346,7 +6346,7 @@ module @hdiff_bundle_16 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_8 = aie.core(%tile23_8) { %lb = arith.constant 0 : index @@ -6369,6 +6369,6 @@ module @hdiff_bundle_16 { aie.objectfifo.release(%block_15_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_2.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_2.mlir index e2c70180b01..4e77bd5bf6b 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_2.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_2.mlir @@ -99,9 +99,9 @@ module @hdiff_bundle_2 { aie.objectfifo.register_external_buffers(%tile2_0, %block_1_buf_out_shim_2 : !aie.objectfifo>, {%ext_buffer_out_1}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -128,7 +128,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -161,7 +161,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -184,7 +184,7 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_0_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -212,7 +212,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -245,7 +245,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -290,7 +290,7 @@ module @hdiff_bundle_2 { } aie.use_lock(%lock22_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -317,7 +317,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -350,7 +350,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -373,7 +373,7 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -400,7 +400,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -433,7 +433,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -456,7 +456,7 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -483,7 +483,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -516,7 +516,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -539,7 +539,7 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_1_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -567,7 +567,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -600,7 +600,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -645,7 +645,7 @@ module @hdiff_bundle_2 { } aie.use_lock(%lock26_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -672,7 +672,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -705,7 +705,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -728,7 +728,7 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -755,7 +755,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -788,7 +788,7 @@ module @hdiff_bundle_2 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -811,6 +811,6 @@ module @hdiff_bundle_2 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_3.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_3.mlir index 284bcf77522..cf422d16767 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_3.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_3.mlir @@ -141,9 +141,9 @@ module @hdiff_bundle_3 { aie.objectfifo.register_external_buffers(%tile3_0, %block_2_buf_out_shim_3 : !aie.objectfifo>, {%ext_buffer_out_2}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -171,7 +171,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -204,7 +204,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -228,7 +228,7 @@ module @hdiff_bundle_3 { } aie.use_lock(%lock21_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -255,7 +255,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -288,7 +288,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -332,7 +332,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_0_buf_out_shim_2:!aie.objectfifo>, 4) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -359,7 +359,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -392,7 +392,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -415,7 +415,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -442,7 +442,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -475,7 +475,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -498,7 +498,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -526,7 +526,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -559,7 +559,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -583,7 +583,7 @@ module @hdiff_bundle_3 { } aie.use_lock(%lock25_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -610,7 +610,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -643,7 +643,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -687,7 +687,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_1_buf_out_shim_2:!aie.objectfifo>, 4) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -714,7 +714,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -747,7 +747,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -770,7 +770,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -797,7 +797,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -830,7 +830,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -853,7 +853,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_1 = aie.core(%tile3_1) { %lb = arith.constant 0 : index @@ -881,7 +881,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_1 = aie.core(%tile4_1) { %lb = arith.constant 0 : index @@ -914,7 +914,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_1 = aie.core(%tile5_1) { %lb = arith.constant 0 : index @@ -938,7 +938,7 @@ module @hdiff_bundle_3 { } aie.use_lock(%lock51_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -965,7 +965,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -998,7 +998,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_2_core5_2 = aie.core(%tile5_2) { @@ -1042,7 +1042,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_2_buf_out_shim_3:!aie.objectfifo>, 4) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_3 = aie.core(%tile3_3) { %lb = arith.constant 0 : index @@ -1069,7 +1069,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_3 = aie.core(%tile4_3) { %lb = arith.constant 0 : index @@ -1102,7 +1102,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_3 = aie.core(%tile5_3) { %lb = arith.constant 0 : index @@ -1125,7 +1125,7 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_2_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_4 = aie.core(%tile3_4) { %lb = arith.constant 0 : index @@ -1152,7 +1152,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_4 = aie.core(%tile4_4) { %lb = arith.constant 0 : index @@ -1185,7 +1185,7 @@ module @hdiff_bundle_3 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_4 = aie.core(%tile5_4) { %lb = arith.constant 0 : index @@ -1208,6 +1208,6 @@ module @hdiff_bundle_3 { aie.objectfifo.release(%block_2_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_32.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_32.mlir index 7db60b8d6ae..bea715cd9e2 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_32.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_32.mlir @@ -1358,9 +1358,9 @@ module @hdiff_bundle_32 { aie.objectfifo.register_external_buffers(%tile47_0, %block_31_buf_out_shim_47 : !aie.objectfifo>, {%ext_buffer_out_31}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -1387,7 +1387,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -1420,7 +1420,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -1443,7 +1443,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_0_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -1471,7 +1471,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -1504,7 +1504,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -1549,7 +1549,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock22_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -1576,7 +1576,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -1609,7 +1609,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -1632,7 +1632,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -1659,7 +1659,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -1692,7 +1692,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -1715,7 +1715,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -1742,7 +1742,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -1775,7 +1775,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -1798,7 +1798,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_1_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -1826,7 +1826,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -1859,7 +1859,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -1904,7 +1904,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock26_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -1931,7 +1931,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -1964,7 +1964,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -1987,7 +1987,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -2014,7 +2014,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -2047,7 +2047,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -2070,7 +2070,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_1 = aie.core(%tile3_1) { %lb = arith.constant 0 : index @@ -2097,7 +2097,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_1 = aie.core(%tile4_1) { %lb = arith.constant 0 : index @@ -2130,7 +2130,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_1 = aie.core(%tile5_1) { %lb = arith.constant 0 : index @@ -2153,7 +2153,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_2_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -2181,7 +2181,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -2214,7 +2214,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_2_core5_2 = aie.core(%tile5_2) { @@ -2259,7 +2259,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock52_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_3 = aie.core(%tile3_3) { %lb = arith.constant 0 : index @@ -2286,7 +2286,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_3 = aie.core(%tile4_3) { %lb = arith.constant 0 : index @@ -2319,7 +2319,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_3 = aie.core(%tile5_3) { %lb = arith.constant 0 : index @@ -2342,7 +2342,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_2_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_4 = aie.core(%tile3_4) { %lb = arith.constant 0 : index @@ -2369,7 +2369,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_4 = aie.core(%tile4_4) { %lb = arith.constant 0 : index @@ -2402,7 +2402,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_4 = aie.core(%tile5_4) { %lb = arith.constant 0 : index @@ -2425,7 +2425,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_2_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_5 = aie.core(%tile3_5) { %lb = arith.constant 0 : index @@ -2452,7 +2452,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_5 = aie.core(%tile4_5) { %lb = arith.constant 0 : index @@ -2485,7 +2485,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_5 = aie.core(%tile5_5) { %lb = arith.constant 0 : index @@ -2508,7 +2508,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_3_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_6 = aie.core(%tile3_6) { %lb = arith.constant 0 : index @@ -2536,7 +2536,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_6 = aie.core(%tile4_6) { %lb = arith.constant 0 : index @@ -2569,7 +2569,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_3_core5_6 = aie.core(%tile5_6) { @@ -2614,7 +2614,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock56_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_7 = aie.core(%tile3_7) { %lb = arith.constant 0 : index @@ -2641,7 +2641,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_7 = aie.core(%tile4_7) { %lb = arith.constant 0 : index @@ -2674,7 +2674,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_7 = aie.core(%tile5_7) { %lb = arith.constant 0 : index @@ -2697,7 +2697,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_3_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_8 = aie.core(%tile3_8) { %lb = arith.constant 0 : index @@ -2724,7 +2724,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_8 = aie.core(%tile4_8) { %lb = arith.constant 0 : index @@ -2757,7 +2757,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_8 = aie.core(%tile5_8) { %lb = arith.constant 0 : index @@ -2780,7 +2780,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_3_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_1 = aie.core(%tile6_1) { %lb = arith.constant 0 : index @@ -2807,7 +2807,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_1 = aie.core(%tile7_1) { %lb = arith.constant 0 : index @@ -2840,7 +2840,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_1 = aie.core(%tile8_1) { %lb = arith.constant 0 : index @@ -2863,7 +2863,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_4_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_2 = aie.core(%tile6_2) { %lb = arith.constant 0 : index @@ -2891,7 +2891,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_2 = aie.core(%tile7_2) { %lb = arith.constant 0 : index @@ -2924,7 +2924,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_4_core8_2 = aie.core(%tile8_2) { @@ -2969,7 +2969,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock82_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_3 = aie.core(%tile6_3) { %lb = arith.constant 0 : index @@ -2996,7 +2996,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_3 = aie.core(%tile7_3) { %lb = arith.constant 0 : index @@ -3029,7 +3029,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_3 = aie.core(%tile8_3) { %lb = arith.constant 0 : index @@ -3052,7 +3052,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_4_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_4 = aie.core(%tile6_4) { %lb = arith.constant 0 : index @@ -3079,7 +3079,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_4 = aie.core(%tile7_4) { %lb = arith.constant 0 : index @@ -3112,7 +3112,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_4 = aie.core(%tile8_4) { %lb = arith.constant 0 : index @@ -3135,7 +3135,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_4_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_5 = aie.core(%tile6_5) { %lb = arith.constant 0 : index @@ -3162,7 +3162,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_5 = aie.core(%tile7_5) { %lb = arith.constant 0 : index @@ -3195,7 +3195,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_5 = aie.core(%tile8_5) { %lb = arith.constant 0 : index @@ -3218,7 +3218,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_5_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_6 = aie.core(%tile6_6) { %lb = arith.constant 0 : index @@ -3246,7 +3246,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_6 = aie.core(%tile7_6) { %lb = arith.constant 0 : index @@ -3279,7 +3279,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_5_core8_6 = aie.core(%tile8_6) { @@ -3324,7 +3324,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock86_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_7 = aie.core(%tile6_7) { %lb = arith.constant 0 : index @@ -3351,7 +3351,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_7 = aie.core(%tile7_7) { %lb = arith.constant 0 : index @@ -3384,7 +3384,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_7 = aie.core(%tile8_7) { %lb = arith.constant 0 : index @@ -3407,7 +3407,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_5_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_8 = aie.core(%tile6_8) { %lb = arith.constant 0 : index @@ -3434,7 +3434,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_8 = aie.core(%tile7_8) { %lb = arith.constant 0 : index @@ -3467,7 +3467,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_8 = aie.core(%tile8_8) { %lb = arith.constant 0 : index @@ -3490,7 +3490,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_5_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_1 = aie.core(%tile9_1) { %lb = arith.constant 0 : index @@ -3517,7 +3517,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_1 = aie.core(%tile10_1) { %lb = arith.constant 0 : index @@ -3550,7 +3550,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_1 = aie.core(%tile11_1) { %lb = arith.constant 0 : index @@ -3573,7 +3573,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_6_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_2 = aie.core(%tile9_2) { %lb = arith.constant 0 : index @@ -3601,7 +3601,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_2 = aie.core(%tile10_2) { %lb = arith.constant 0 : index @@ -3634,7 +3634,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_6_core11_2 = aie.core(%tile11_2) { @@ -3679,7 +3679,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock112_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_3 = aie.core(%tile9_3) { %lb = arith.constant 0 : index @@ -3706,7 +3706,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_3 = aie.core(%tile10_3) { %lb = arith.constant 0 : index @@ -3739,7 +3739,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_3 = aie.core(%tile11_3) { %lb = arith.constant 0 : index @@ -3762,7 +3762,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_6_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_4 = aie.core(%tile9_4) { %lb = arith.constant 0 : index @@ -3789,7 +3789,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_4 = aie.core(%tile10_4) { %lb = arith.constant 0 : index @@ -3822,7 +3822,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_4 = aie.core(%tile11_4) { %lb = arith.constant 0 : index @@ -3845,7 +3845,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_6_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_5 = aie.core(%tile9_5) { %lb = arith.constant 0 : index @@ -3872,7 +3872,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_5 = aie.core(%tile10_5) { %lb = arith.constant 0 : index @@ -3905,7 +3905,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_5 = aie.core(%tile11_5) { %lb = arith.constant 0 : index @@ -3928,7 +3928,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_7_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_6 = aie.core(%tile9_6) { %lb = arith.constant 0 : index @@ -3956,7 +3956,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_6 = aie.core(%tile10_6) { %lb = arith.constant 0 : index @@ -3989,7 +3989,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_7_core11_6 = aie.core(%tile11_6) { @@ -4034,7 +4034,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock116_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_7 = aie.core(%tile9_7) { %lb = arith.constant 0 : index @@ -4061,7 +4061,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_7 = aie.core(%tile10_7) { %lb = arith.constant 0 : index @@ -4094,7 +4094,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_7 = aie.core(%tile11_7) { %lb = arith.constant 0 : index @@ -4117,7 +4117,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_7_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_8 = aie.core(%tile9_8) { %lb = arith.constant 0 : index @@ -4144,7 +4144,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_8 = aie.core(%tile10_8) { %lb = arith.constant 0 : index @@ -4177,7 +4177,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_8 = aie.core(%tile11_8) { %lb = arith.constant 0 : index @@ -4200,7 +4200,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_7_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_1 = aie.core(%tile12_1) { %lb = arith.constant 0 : index @@ -4227,7 +4227,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_1 = aie.core(%tile13_1) { %lb = arith.constant 0 : index @@ -4260,7 +4260,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_1 = aie.core(%tile14_1) { %lb = arith.constant 0 : index @@ -4283,7 +4283,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_8_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_2 = aie.core(%tile12_2) { %lb = arith.constant 0 : index @@ -4311,7 +4311,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_2 = aie.core(%tile13_2) { %lb = arith.constant 0 : index @@ -4344,7 +4344,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_8_core14_2 = aie.core(%tile14_2) { @@ -4389,7 +4389,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock142_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_3 = aie.core(%tile12_3) { %lb = arith.constant 0 : index @@ -4416,7 +4416,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_3 = aie.core(%tile13_3) { %lb = arith.constant 0 : index @@ -4449,7 +4449,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_3 = aie.core(%tile14_3) { %lb = arith.constant 0 : index @@ -4472,7 +4472,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_8_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_8_core12_4 = aie.core(%tile12_4) { %lb = arith.constant 0 : index @@ -4499,7 +4499,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_8_core13_4 = aie.core(%tile13_4) { %lb = arith.constant 0 : index @@ -4532,7 +4532,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_8_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_8_core14_4 = aie.core(%tile14_4) { %lb = arith.constant 0 : index @@ -4555,7 +4555,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_8_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_5 = aie.core(%tile12_5) { %lb = arith.constant 0 : index @@ -4582,7 +4582,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_5 = aie.core(%tile13_5) { %lb = arith.constant 0 : index @@ -4615,7 +4615,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_5 = aie.core(%tile14_5) { %lb = arith.constant 0 : index @@ -4638,7 +4638,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_9_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_6 = aie.core(%tile12_6) { %lb = arith.constant 0 : index @@ -4666,7 +4666,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_6 = aie.core(%tile13_6) { %lb = arith.constant 0 : index @@ -4699,7 +4699,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_9_core14_6 = aie.core(%tile14_6) { @@ -4744,7 +4744,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock146_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_7 = aie.core(%tile12_7) { %lb = arith.constant 0 : index @@ -4771,7 +4771,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_7 = aie.core(%tile13_7) { %lb = arith.constant 0 : index @@ -4804,7 +4804,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_7 = aie.core(%tile14_7) { %lb = arith.constant 0 : index @@ -4827,7 +4827,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_9_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_9_core12_8 = aie.core(%tile12_8) { %lb = arith.constant 0 : index @@ -4854,7 +4854,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_9_core13_8 = aie.core(%tile13_8) { %lb = arith.constant 0 : index @@ -4887,7 +4887,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_9_buf_in_shim_10: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_9_core14_8 = aie.core(%tile14_8) { %lb = arith.constant 0 : index @@ -4910,7 +4910,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_9_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_1 = aie.core(%tile15_1) { %lb = arith.constant 0 : index @@ -4937,7 +4937,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_1 = aie.core(%tile16_1) { %lb = arith.constant 0 : index @@ -4970,7 +4970,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_1 = aie.core(%tile17_1) { %lb = arith.constant 0 : index @@ -4993,7 +4993,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_10_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_2 = aie.core(%tile15_2) { %lb = arith.constant 0 : index @@ -5021,7 +5021,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_2 = aie.core(%tile16_2) { %lb = arith.constant 0 : index @@ -5054,7 +5054,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_10_core17_2 = aie.core(%tile17_2) { @@ -5099,7 +5099,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock172_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_3 = aie.core(%tile15_3) { %lb = arith.constant 0 : index @@ -5126,7 +5126,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_3 = aie.core(%tile16_3) { %lb = arith.constant 0 : index @@ -5159,7 +5159,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_3 = aie.core(%tile17_3) { %lb = arith.constant 0 : index @@ -5182,7 +5182,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_10_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_10_core15_4 = aie.core(%tile15_4) { %lb = arith.constant 0 : index @@ -5209,7 +5209,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_10_core16_4 = aie.core(%tile16_4) { %lb = arith.constant 0 : index @@ -5242,7 +5242,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_10_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_10_core17_4 = aie.core(%tile17_4) { %lb = arith.constant 0 : index @@ -5265,7 +5265,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_10_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_5 = aie.core(%tile15_5) { %lb = arith.constant 0 : index @@ -5292,7 +5292,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_5 = aie.core(%tile16_5) { %lb = arith.constant 0 : index @@ -5325,7 +5325,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_5 = aie.core(%tile17_5) { %lb = arith.constant 0 : index @@ -5348,7 +5348,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_11_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_6 = aie.core(%tile15_6) { %lb = arith.constant 0 : index @@ -5376,7 +5376,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_6 = aie.core(%tile16_6) { %lb = arith.constant 0 : index @@ -5409,7 +5409,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_11_core17_6 = aie.core(%tile17_6) { @@ -5454,7 +5454,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock176_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_7 = aie.core(%tile15_7) { %lb = arith.constant 0 : index @@ -5481,7 +5481,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_7 = aie.core(%tile16_7) { %lb = arith.constant 0 : index @@ -5514,7 +5514,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_7 = aie.core(%tile17_7) { %lb = arith.constant 0 : index @@ -5537,7 +5537,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_11_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_11_core15_8 = aie.core(%tile15_8) { %lb = arith.constant 0 : index @@ -5564,7 +5564,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_11_core16_8 = aie.core(%tile16_8) { %lb = arith.constant 0 : index @@ -5597,7 +5597,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_11_buf_in_shim_11: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_11_core17_8 = aie.core(%tile17_8) { %lb = arith.constant 0 : index @@ -5620,7 +5620,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_11_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_1 = aie.core(%tile18_1) { %lb = arith.constant 0 : index @@ -5647,7 +5647,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_1 = aie.core(%tile19_1) { %lb = arith.constant 0 : index @@ -5680,7 +5680,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_1 = aie.core(%tile20_1) { %lb = arith.constant 0 : index @@ -5703,7 +5703,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_12_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_2 = aie.core(%tile18_2) { %lb = arith.constant 0 : index @@ -5731,7 +5731,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_2 = aie.core(%tile19_2) { %lb = arith.constant 0 : index @@ -5764,7 +5764,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_12_core20_2 = aie.core(%tile20_2) { @@ -5809,7 +5809,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock202_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_3 = aie.core(%tile18_3) { %lb = arith.constant 0 : index @@ -5836,7 +5836,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_3 = aie.core(%tile19_3) { %lb = arith.constant 0 : index @@ -5869,7 +5869,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_3 = aie.core(%tile20_3) { %lb = arith.constant 0 : index @@ -5892,7 +5892,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_12_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_12_core18_4 = aie.core(%tile18_4) { %lb = arith.constant 0 : index @@ -5919,7 +5919,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_12_core19_4 = aie.core(%tile19_4) { %lb = arith.constant 0 : index @@ -5952,7 +5952,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_12_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_12_core20_4 = aie.core(%tile20_4) { %lb = arith.constant 0 : index @@ -5975,7 +5975,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_12_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_5 = aie.core(%tile18_5) { %lb = arith.constant 0 : index @@ -6002,7 +6002,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_5 = aie.core(%tile19_5) { %lb = arith.constant 0 : index @@ -6035,7 +6035,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_5 = aie.core(%tile20_5) { %lb = arith.constant 0 : index @@ -6058,7 +6058,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_13_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_6 = aie.core(%tile18_6) { %lb = arith.constant 0 : index @@ -6086,7 +6086,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_6 = aie.core(%tile19_6) { %lb = arith.constant 0 : index @@ -6119,7 +6119,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_13_core20_6 = aie.core(%tile20_6) { @@ -6164,7 +6164,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock206_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_7 = aie.core(%tile18_7) { %lb = arith.constant 0 : index @@ -6191,7 +6191,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_7 = aie.core(%tile19_7) { %lb = arith.constant 0 : index @@ -6224,7 +6224,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_7 = aie.core(%tile20_7) { %lb = arith.constant 0 : index @@ -6247,7 +6247,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_13_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_13_core18_8 = aie.core(%tile18_8) { %lb = arith.constant 0 : index @@ -6274,7 +6274,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_13_core19_8 = aie.core(%tile19_8) { %lb = arith.constant 0 : index @@ -6307,7 +6307,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_13_buf_in_shim_18: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_13_core20_8 = aie.core(%tile20_8) { %lb = arith.constant 0 : index @@ -6330,7 +6330,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_13_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_1 = aie.core(%tile21_1) { %lb = arith.constant 0 : index @@ -6357,7 +6357,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_1 = aie.core(%tile22_1) { %lb = arith.constant 0 : index @@ -6390,7 +6390,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_1 = aie.core(%tile23_1) { %lb = arith.constant 0 : index @@ -6413,7 +6413,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_14_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_2 = aie.core(%tile21_2) { %lb = arith.constant 0 : index @@ -6441,7 +6441,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_2 = aie.core(%tile22_2) { %lb = arith.constant 0 : index @@ -6474,7 +6474,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_14_core23_2 = aie.core(%tile23_2) { @@ -6519,7 +6519,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock232_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_3 = aie.core(%tile21_3) { %lb = arith.constant 0 : index @@ -6546,7 +6546,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_3 = aie.core(%tile22_3) { %lb = arith.constant 0 : index @@ -6579,7 +6579,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_3 = aie.core(%tile23_3) { %lb = arith.constant 0 : index @@ -6602,7 +6602,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_14_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_14_core21_4 = aie.core(%tile21_4) { %lb = arith.constant 0 : index @@ -6629,7 +6629,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_14_core22_4 = aie.core(%tile22_4) { %lb = arith.constant 0 : index @@ -6662,7 +6662,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_14_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_14_core23_4 = aie.core(%tile23_4) { %lb = arith.constant 0 : index @@ -6685,7 +6685,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_14_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_5 = aie.core(%tile21_5) { %lb = arith.constant 0 : index @@ -6712,7 +6712,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_5 = aie.core(%tile22_5) { %lb = arith.constant 0 : index @@ -6745,7 +6745,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_5 = aie.core(%tile23_5) { %lb = arith.constant 0 : index @@ -6768,7 +6768,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_15_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_6 = aie.core(%tile21_6) { %lb = arith.constant 0 : index @@ -6796,7 +6796,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_6 = aie.core(%tile22_6) { %lb = arith.constant 0 : index @@ -6829,7 +6829,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_15_core23_6 = aie.core(%tile23_6) { @@ -6874,7 +6874,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock236_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_7 = aie.core(%tile21_7) { %lb = arith.constant 0 : index @@ -6901,7 +6901,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_7 = aie.core(%tile22_7) { %lb = arith.constant 0 : index @@ -6934,7 +6934,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_7 = aie.core(%tile23_7) { %lb = arith.constant 0 : index @@ -6957,7 +6957,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_15_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_15_core21_8 = aie.core(%tile21_8) { %lb = arith.constant 0 : index @@ -6984,7 +6984,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_15_core22_8 = aie.core(%tile22_8) { %lb = arith.constant 0 : index @@ -7017,7 +7017,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_15_buf_in_shim_19: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_15_core23_8 = aie.core(%tile23_8) { %lb = arith.constant 0 : index @@ -7040,7 +7040,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_15_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_16_core24_1 = aie.core(%tile24_1) { %lb = arith.constant 0 : index @@ -7067,7 +7067,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_16_core25_1 = aie.core(%tile25_1) { %lb = arith.constant 0 : index @@ -7100,7 +7100,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_16_core26_1 = aie.core(%tile26_1) { %lb = arith.constant 0 : index @@ -7123,7 +7123,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_16_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_16_core24_2 = aie.core(%tile24_2) { %lb = arith.constant 0 : index @@ -7151,7 +7151,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_16_core25_2 = aie.core(%tile25_2) { %lb = arith.constant 0 : index @@ -7184,7 +7184,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_16_core26_2 = aie.core(%tile26_2) { @@ -7229,7 +7229,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock262_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_16_core24_3 = aie.core(%tile24_3) { %lb = arith.constant 0 : index @@ -7256,7 +7256,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_16_core25_3 = aie.core(%tile25_3) { %lb = arith.constant 0 : index @@ -7289,7 +7289,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_16_core26_3 = aie.core(%tile26_3) { %lb = arith.constant 0 : index @@ -7312,7 +7312,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_16_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_16_core24_4 = aie.core(%tile24_4) { %lb = arith.constant 0 : index @@ -7339,7 +7339,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_16_core25_4 = aie.core(%tile25_4) { %lb = arith.constant 0 : index @@ -7372,7 +7372,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_16_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_16_core26_4 = aie.core(%tile26_4) { %lb = arith.constant 0 : index @@ -7395,7 +7395,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_16_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_17_core24_5 = aie.core(%tile24_5) { %lb = arith.constant 0 : index @@ -7422,7 +7422,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_17_core25_5 = aie.core(%tile25_5) { %lb = arith.constant 0 : index @@ -7455,7 +7455,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_17_core26_5 = aie.core(%tile26_5) { %lb = arith.constant 0 : index @@ -7478,7 +7478,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_17_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_17_core24_6 = aie.core(%tile24_6) { %lb = arith.constant 0 : index @@ -7506,7 +7506,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_17_core25_6 = aie.core(%tile25_6) { %lb = arith.constant 0 : index @@ -7539,7 +7539,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_17_core26_6 = aie.core(%tile26_6) { @@ -7584,7 +7584,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock266_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_17_core24_7 = aie.core(%tile24_7) { %lb = arith.constant 0 : index @@ -7611,7 +7611,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_17_core25_7 = aie.core(%tile25_7) { %lb = arith.constant 0 : index @@ -7644,7 +7644,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_17_core26_7 = aie.core(%tile26_7) { %lb = arith.constant 0 : index @@ -7667,7 +7667,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_17_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_17_core24_8 = aie.core(%tile24_8) { %lb = arith.constant 0 : index @@ -7694,7 +7694,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_17_core25_8 = aie.core(%tile25_8) { %lb = arith.constant 0 : index @@ -7727,7 +7727,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_17_buf_in_shim_26: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_17_core26_8 = aie.core(%tile26_8) { %lb = arith.constant 0 : index @@ -7750,7 +7750,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_17_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_18_core27_1 = aie.core(%tile27_1) { %lb = arith.constant 0 : index @@ -7777,7 +7777,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_18_core28_1 = aie.core(%tile28_1) { %lb = arith.constant 0 : index @@ -7810,7 +7810,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_18_core29_1 = aie.core(%tile29_1) { %lb = arith.constant 0 : index @@ -7833,7 +7833,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_18_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_18_core27_2 = aie.core(%tile27_2) { %lb = arith.constant 0 : index @@ -7861,7 +7861,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_18_core28_2 = aie.core(%tile28_2) { %lb = arith.constant 0 : index @@ -7894,7 +7894,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_18_core29_2 = aie.core(%tile29_2) { @@ -7939,7 +7939,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock292_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_18_core27_3 = aie.core(%tile27_3) { %lb = arith.constant 0 : index @@ -7966,7 +7966,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_18_core28_3 = aie.core(%tile28_3) { %lb = arith.constant 0 : index @@ -7999,7 +7999,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_18_core29_3 = aie.core(%tile29_3) { %lb = arith.constant 0 : index @@ -8022,7 +8022,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_18_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_18_core27_4 = aie.core(%tile27_4) { %lb = arith.constant 0 : index @@ -8049,7 +8049,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_18_core28_4 = aie.core(%tile28_4) { %lb = arith.constant 0 : index @@ -8082,7 +8082,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_18_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_18_core29_4 = aie.core(%tile29_4) { %lb = arith.constant 0 : index @@ -8105,7 +8105,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_18_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_19_core27_5 = aie.core(%tile27_5) { %lb = arith.constant 0 : index @@ -8132,7 +8132,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_19_core28_5 = aie.core(%tile28_5) { %lb = arith.constant 0 : index @@ -8165,7 +8165,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_19_core29_5 = aie.core(%tile29_5) { %lb = arith.constant 0 : index @@ -8188,7 +8188,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_19_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_19_core27_6 = aie.core(%tile27_6) { %lb = arith.constant 0 : index @@ -8216,7 +8216,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_19_core28_6 = aie.core(%tile28_6) { %lb = arith.constant 0 : index @@ -8249,7 +8249,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_19_core29_6 = aie.core(%tile29_6) { @@ -8294,7 +8294,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock296_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_19_core27_7 = aie.core(%tile27_7) { %lb = arith.constant 0 : index @@ -8321,7 +8321,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_19_core28_7 = aie.core(%tile28_7) { %lb = arith.constant 0 : index @@ -8354,7 +8354,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_19_core29_7 = aie.core(%tile29_7) { %lb = arith.constant 0 : index @@ -8377,7 +8377,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_19_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_19_core27_8 = aie.core(%tile27_8) { %lb = arith.constant 0 : index @@ -8404,7 +8404,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_19_core28_8 = aie.core(%tile28_8) { %lb = arith.constant 0 : index @@ -8437,7 +8437,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_19_buf_in_shim_27: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_19_core29_8 = aie.core(%tile29_8) { %lb = arith.constant 0 : index @@ -8460,7 +8460,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_19_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_20_core30_1 = aie.core(%tile30_1) { %lb = arith.constant 0 : index @@ -8487,7 +8487,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_20_core31_1 = aie.core(%tile31_1) { %lb = arith.constant 0 : index @@ -8520,7 +8520,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_20_core32_1 = aie.core(%tile32_1) { %lb = arith.constant 0 : index @@ -8543,7 +8543,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_20_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_20_core30_2 = aie.core(%tile30_2) { %lb = arith.constant 0 : index @@ -8571,7 +8571,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_20_core31_2 = aie.core(%tile31_2) { %lb = arith.constant 0 : index @@ -8604,7 +8604,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_20_core32_2 = aie.core(%tile32_2) { @@ -8649,7 +8649,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock322_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_20_core30_3 = aie.core(%tile30_3) { %lb = arith.constant 0 : index @@ -8676,7 +8676,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_20_core31_3 = aie.core(%tile31_3) { %lb = arith.constant 0 : index @@ -8709,7 +8709,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_20_core32_3 = aie.core(%tile32_3) { %lb = arith.constant 0 : index @@ -8732,7 +8732,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_20_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_20_core30_4 = aie.core(%tile30_4) { %lb = arith.constant 0 : index @@ -8759,7 +8759,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_20_core31_4 = aie.core(%tile31_4) { %lb = arith.constant 0 : index @@ -8792,7 +8792,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_20_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_20_core32_4 = aie.core(%tile32_4) { %lb = arith.constant 0 : index @@ -8815,7 +8815,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_20_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_21_core30_5 = aie.core(%tile30_5) { %lb = arith.constant 0 : index @@ -8842,7 +8842,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_21_core31_5 = aie.core(%tile31_5) { %lb = arith.constant 0 : index @@ -8875,7 +8875,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_21_core32_5 = aie.core(%tile32_5) { %lb = arith.constant 0 : index @@ -8898,7 +8898,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_21_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_21_core30_6 = aie.core(%tile30_6) { %lb = arith.constant 0 : index @@ -8926,7 +8926,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_21_core31_6 = aie.core(%tile31_6) { %lb = arith.constant 0 : index @@ -8959,7 +8959,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_21_core32_6 = aie.core(%tile32_6) { @@ -9004,7 +9004,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock326_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_21_core30_7 = aie.core(%tile30_7) { %lb = arith.constant 0 : index @@ -9031,7 +9031,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_21_core31_7 = aie.core(%tile31_7) { %lb = arith.constant 0 : index @@ -9064,7 +9064,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_21_core32_7 = aie.core(%tile32_7) { %lb = arith.constant 0 : index @@ -9087,7 +9087,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_21_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_21_core30_8 = aie.core(%tile30_8) { %lb = arith.constant 0 : index @@ -9114,7 +9114,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_21_core31_8 = aie.core(%tile31_8) { %lb = arith.constant 0 : index @@ -9147,7 +9147,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_21_buf_in_shim_34: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_21_core32_8 = aie.core(%tile32_8) { %lb = arith.constant 0 : index @@ -9170,7 +9170,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_21_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_22_core33_1 = aie.core(%tile33_1) { %lb = arith.constant 0 : index @@ -9197,7 +9197,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_22_core34_1 = aie.core(%tile34_1) { %lb = arith.constant 0 : index @@ -9230,7 +9230,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_22_core35_1 = aie.core(%tile35_1) { %lb = arith.constant 0 : index @@ -9253,7 +9253,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_22_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_22_core33_2 = aie.core(%tile33_2) { %lb = arith.constant 0 : index @@ -9281,7 +9281,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_22_core34_2 = aie.core(%tile34_2) { %lb = arith.constant 0 : index @@ -9314,7 +9314,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_22_core35_2 = aie.core(%tile35_2) { @@ -9359,7 +9359,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock352_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_22_core33_3 = aie.core(%tile33_3) { %lb = arith.constant 0 : index @@ -9386,7 +9386,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_22_core34_3 = aie.core(%tile34_3) { %lb = arith.constant 0 : index @@ -9419,7 +9419,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_22_core35_3 = aie.core(%tile35_3) { %lb = arith.constant 0 : index @@ -9442,7 +9442,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_22_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_22_core33_4 = aie.core(%tile33_4) { %lb = arith.constant 0 : index @@ -9469,7 +9469,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_22_core34_4 = aie.core(%tile34_4) { %lb = arith.constant 0 : index @@ -9502,7 +9502,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_22_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_22_core35_4 = aie.core(%tile35_4) { %lb = arith.constant 0 : index @@ -9525,7 +9525,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_22_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_23_core33_5 = aie.core(%tile33_5) { %lb = arith.constant 0 : index @@ -9552,7 +9552,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_23_core34_5 = aie.core(%tile34_5) { %lb = arith.constant 0 : index @@ -9585,7 +9585,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_23_core35_5 = aie.core(%tile35_5) { %lb = arith.constant 0 : index @@ -9608,7 +9608,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_23_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_23_core33_6 = aie.core(%tile33_6) { %lb = arith.constant 0 : index @@ -9636,7 +9636,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_23_core34_6 = aie.core(%tile34_6) { %lb = arith.constant 0 : index @@ -9669,7 +9669,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_23_core35_6 = aie.core(%tile35_6) { @@ -9714,7 +9714,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock356_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_23_core33_7 = aie.core(%tile33_7) { %lb = arith.constant 0 : index @@ -9741,7 +9741,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_23_core34_7 = aie.core(%tile34_7) { %lb = arith.constant 0 : index @@ -9774,7 +9774,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_23_core35_7 = aie.core(%tile35_7) { %lb = arith.constant 0 : index @@ -9797,7 +9797,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_23_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_23_core33_8 = aie.core(%tile33_8) { %lb = arith.constant 0 : index @@ -9824,7 +9824,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_23_core34_8 = aie.core(%tile34_8) { %lb = arith.constant 0 : index @@ -9857,7 +9857,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_23_buf_in_shim_35: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_23_core35_8 = aie.core(%tile35_8) { %lb = arith.constant 0 : index @@ -9880,7 +9880,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_23_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_24_core36_1 = aie.core(%tile36_1) { %lb = arith.constant 0 : index @@ -9907,7 +9907,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_24_core37_1 = aie.core(%tile37_1) { %lb = arith.constant 0 : index @@ -9940,7 +9940,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_24_core38_1 = aie.core(%tile38_1) { %lb = arith.constant 0 : index @@ -9963,7 +9963,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_24_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_24_core36_2 = aie.core(%tile36_2) { %lb = arith.constant 0 : index @@ -9991,7 +9991,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_24_core37_2 = aie.core(%tile37_2) { %lb = arith.constant 0 : index @@ -10024,7 +10024,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_24_core38_2 = aie.core(%tile38_2) { @@ -10069,7 +10069,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock382_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_24_core36_3 = aie.core(%tile36_3) { %lb = arith.constant 0 : index @@ -10096,7 +10096,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_24_core37_3 = aie.core(%tile37_3) { %lb = arith.constant 0 : index @@ -10129,7 +10129,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_24_core38_3 = aie.core(%tile38_3) { %lb = arith.constant 0 : index @@ -10152,7 +10152,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_24_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_24_core36_4 = aie.core(%tile36_4) { %lb = arith.constant 0 : index @@ -10179,7 +10179,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_24_core37_4 = aie.core(%tile37_4) { %lb = arith.constant 0 : index @@ -10212,7 +10212,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_24_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_24_core38_4 = aie.core(%tile38_4) { %lb = arith.constant 0 : index @@ -10235,7 +10235,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_24_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_25_core36_5 = aie.core(%tile36_5) { %lb = arith.constant 0 : index @@ -10262,7 +10262,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_25_core37_5 = aie.core(%tile37_5) { %lb = arith.constant 0 : index @@ -10295,7 +10295,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_25_core38_5 = aie.core(%tile38_5) { %lb = arith.constant 0 : index @@ -10318,7 +10318,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_25_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_25_core36_6 = aie.core(%tile36_6) { %lb = arith.constant 0 : index @@ -10346,7 +10346,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_25_core37_6 = aie.core(%tile37_6) { %lb = arith.constant 0 : index @@ -10379,7 +10379,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_25_core38_6 = aie.core(%tile38_6) { @@ -10424,7 +10424,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock386_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_25_core36_7 = aie.core(%tile36_7) { %lb = arith.constant 0 : index @@ -10451,7 +10451,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_25_core37_7 = aie.core(%tile37_7) { %lb = arith.constant 0 : index @@ -10484,7 +10484,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_25_core38_7 = aie.core(%tile38_7) { %lb = arith.constant 0 : index @@ -10507,7 +10507,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_25_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_25_core36_8 = aie.core(%tile36_8) { %lb = arith.constant 0 : index @@ -10534,7 +10534,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_25_core37_8 = aie.core(%tile37_8) { %lb = arith.constant 0 : index @@ -10567,7 +10567,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_25_buf_in_shim_42: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_25_core38_8 = aie.core(%tile38_8) { %lb = arith.constant 0 : index @@ -10590,7 +10590,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_25_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_26_core39_1 = aie.core(%tile39_1) { %lb = arith.constant 0 : index @@ -10617,7 +10617,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_26_core40_1 = aie.core(%tile40_1) { %lb = arith.constant 0 : index @@ -10650,7 +10650,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_26_core41_1 = aie.core(%tile41_1) { %lb = arith.constant 0 : index @@ -10673,7 +10673,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_26_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_26_core39_2 = aie.core(%tile39_2) { %lb = arith.constant 0 : index @@ -10701,7 +10701,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_26_core40_2 = aie.core(%tile40_2) { %lb = arith.constant 0 : index @@ -10734,7 +10734,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_26_core41_2 = aie.core(%tile41_2) { @@ -10779,7 +10779,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock412_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_26_core39_3 = aie.core(%tile39_3) { %lb = arith.constant 0 : index @@ -10806,7 +10806,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_26_core40_3 = aie.core(%tile40_3) { %lb = arith.constant 0 : index @@ -10839,7 +10839,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_26_core41_3 = aie.core(%tile41_3) { %lb = arith.constant 0 : index @@ -10862,7 +10862,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_26_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_26_core39_4 = aie.core(%tile39_4) { %lb = arith.constant 0 : index @@ -10889,7 +10889,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_26_core40_4 = aie.core(%tile40_4) { %lb = arith.constant 0 : index @@ -10922,7 +10922,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_26_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_26_core41_4 = aie.core(%tile41_4) { %lb = arith.constant 0 : index @@ -10945,7 +10945,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_26_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_27_core39_5 = aie.core(%tile39_5) { %lb = arith.constant 0 : index @@ -10972,7 +10972,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_27_core40_5 = aie.core(%tile40_5) { %lb = arith.constant 0 : index @@ -11005,7 +11005,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_27_core41_5 = aie.core(%tile41_5) { %lb = arith.constant 0 : index @@ -11028,7 +11028,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_27_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_27_core39_6 = aie.core(%tile39_6) { %lb = arith.constant 0 : index @@ -11056,7 +11056,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_27_core40_6 = aie.core(%tile40_6) { %lb = arith.constant 0 : index @@ -11089,7 +11089,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_27_core41_6 = aie.core(%tile41_6) { @@ -11134,7 +11134,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock416_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_27_core39_7 = aie.core(%tile39_7) { %lb = arith.constant 0 : index @@ -11161,7 +11161,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_27_core40_7 = aie.core(%tile40_7) { %lb = arith.constant 0 : index @@ -11194,7 +11194,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_27_core41_7 = aie.core(%tile41_7) { %lb = arith.constant 0 : index @@ -11217,7 +11217,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_27_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_27_core39_8 = aie.core(%tile39_8) { %lb = arith.constant 0 : index @@ -11244,7 +11244,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_27_core40_8 = aie.core(%tile40_8) { %lb = arith.constant 0 : index @@ -11277,7 +11277,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_27_buf_in_shim_43: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_27_core41_8 = aie.core(%tile41_8) { %lb = arith.constant 0 : index @@ -11300,7 +11300,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_27_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_28_core42_1 = aie.core(%tile42_1) { %lb = arith.constant 0 : index @@ -11327,7 +11327,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_28_core43_1 = aie.core(%tile43_1) { %lb = arith.constant 0 : index @@ -11360,7 +11360,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_28_core44_1 = aie.core(%tile44_1) { %lb = arith.constant 0 : index @@ -11383,7 +11383,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_28_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_28_core42_2 = aie.core(%tile42_2) { %lb = arith.constant 0 : index @@ -11411,7 +11411,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_28_core43_2 = aie.core(%tile43_2) { %lb = arith.constant 0 : index @@ -11444,7 +11444,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_28_core44_2 = aie.core(%tile44_2) { @@ -11489,7 +11489,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock442_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_28_core42_3 = aie.core(%tile42_3) { %lb = arith.constant 0 : index @@ -11516,7 +11516,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_28_core43_3 = aie.core(%tile43_3) { %lb = arith.constant 0 : index @@ -11549,7 +11549,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_28_core44_3 = aie.core(%tile44_3) { %lb = arith.constant 0 : index @@ -11572,7 +11572,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_28_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_28_core42_4 = aie.core(%tile42_4) { %lb = arith.constant 0 : index @@ -11599,7 +11599,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_28_core43_4 = aie.core(%tile43_4) { %lb = arith.constant 0 : index @@ -11632,7 +11632,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_28_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_28_core44_4 = aie.core(%tile44_4) { %lb = arith.constant 0 : index @@ -11655,7 +11655,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_28_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_29_core42_5 = aie.core(%tile42_5) { %lb = arith.constant 0 : index @@ -11682,7 +11682,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_29_core43_5 = aie.core(%tile43_5) { %lb = arith.constant 0 : index @@ -11715,7 +11715,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_29_core44_5 = aie.core(%tile44_5) { %lb = arith.constant 0 : index @@ -11738,7 +11738,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_29_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_29_core42_6 = aie.core(%tile42_6) { %lb = arith.constant 0 : index @@ -11766,7 +11766,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_29_core43_6 = aie.core(%tile43_6) { %lb = arith.constant 0 : index @@ -11799,7 +11799,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_29_core44_6 = aie.core(%tile44_6) { @@ -11844,7 +11844,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock446_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_29_core42_7 = aie.core(%tile42_7) { %lb = arith.constant 0 : index @@ -11871,7 +11871,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_29_core43_7 = aie.core(%tile43_7) { %lb = arith.constant 0 : index @@ -11904,7 +11904,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_29_core44_7 = aie.core(%tile44_7) { %lb = arith.constant 0 : index @@ -11927,7 +11927,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_29_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_29_core42_8 = aie.core(%tile42_8) { %lb = arith.constant 0 : index @@ -11954,7 +11954,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_29_core43_8 = aie.core(%tile43_8) { %lb = arith.constant 0 : index @@ -11987,7 +11987,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_29_buf_in_shim_46: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_29_core44_8 = aie.core(%tile44_8) { %lb = arith.constant 0 : index @@ -12010,7 +12010,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_29_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_30_core45_1 = aie.core(%tile45_1) { %lb = arith.constant 0 : index @@ -12037,7 +12037,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_30_core46_1 = aie.core(%tile46_1) { %lb = arith.constant 0 : index @@ -12070,7 +12070,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_30_core47_1 = aie.core(%tile47_1) { %lb = arith.constant 0 : index @@ -12093,7 +12093,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_30_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_30_core45_2 = aie.core(%tile45_2) { %lb = arith.constant 0 : index @@ -12121,7 +12121,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_30_core46_2 = aie.core(%tile46_2) { %lb = arith.constant 0 : index @@ -12154,7 +12154,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_30_core47_2 = aie.core(%tile47_2) { @@ -12199,7 +12199,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock472_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_30_core45_3 = aie.core(%tile45_3) { %lb = arith.constant 0 : index @@ -12226,7 +12226,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_30_core46_3 = aie.core(%tile46_3) { %lb = arith.constant 0 : index @@ -12259,7 +12259,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_30_core47_3 = aie.core(%tile47_3) { %lb = arith.constant 0 : index @@ -12282,7 +12282,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_30_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_30_core45_4 = aie.core(%tile45_4) { %lb = arith.constant 0 : index @@ -12309,7 +12309,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_30_core46_4 = aie.core(%tile46_4) { %lb = arith.constant 0 : index @@ -12342,7 +12342,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_30_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_30_core47_4 = aie.core(%tile47_4) { %lb = arith.constant 0 : index @@ -12365,7 +12365,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_30_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_31_core45_5 = aie.core(%tile45_5) { %lb = arith.constant 0 : index @@ -12392,7 +12392,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_31_core46_5 = aie.core(%tile46_5) { %lb = arith.constant 0 : index @@ -12425,7 +12425,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_31_core47_5 = aie.core(%tile47_5) { %lb = arith.constant 0 : index @@ -12448,7 +12448,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_31_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_31_core45_6 = aie.core(%tile45_6) { %lb = arith.constant 0 : index @@ -12476,7 +12476,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_31_core46_6 = aie.core(%tile46_6) { %lb = arith.constant 0 : index @@ -12509,7 +12509,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_31_core47_6 = aie.core(%tile47_6) { @@ -12554,7 +12554,7 @@ module @hdiff_bundle_32 { } aie.use_lock(%lock476_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_31_core45_7 = aie.core(%tile45_7) { %lb = arith.constant 0 : index @@ -12581,7 +12581,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_31_core46_7 = aie.core(%tile46_7) { %lb = arith.constant 0 : index @@ -12614,7 +12614,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_31_core47_7 = aie.core(%tile47_7) { %lb = arith.constant 0 : index @@ -12637,7 +12637,7 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_31_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_31_core45_8 = aie.core(%tile45_8) { %lb = arith.constant 0 : index @@ -12664,7 +12664,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_31_core46_8 = aie.core(%tile46_8) { %lb = arith.constant 0 : index @@ -12697,7 +12697,7 @@ module @hdiff_bundle_32 { } aie.objectfifo.release(%block_31_buf_in_shim_47: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_31_core47_8 = aie.core(%tile47_8) { %lb = arith.constant 0 : index @@ -12720,6 +12720,6 @@ module @hdiff_bundle_32 { aie.objectfifo.release(%block_31_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_4.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_4.mlir index b1c53088895..1dd0746754a 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_4.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_4.mlir @@ -183,9 +183,9 @@ module @hdiff_bundle_4 { aie.objectfifo.register_external_buffers(%tile3_0, %block_3_buf_out_shim_3 : !aie.objectfifo>, {%ext_buffer_out_3}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -212,7 +212,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -245,7 +245,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -268,7 +268,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_0_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -296,7 +296,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -329,7 +329,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -374,7 +374,7 @@ module @hdiff_bundle_4 { } aie.use_lock(%lock22_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -401,7 +401,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -434,7 +434,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -457,7 +457,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -484,7 +484,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -517,7 +517,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -540,7 +540,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -567,7 +567,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -600,7 +600,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -623,7 +623,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_1_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -651,7 +651,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -684,7 +684,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -729,7 +729,7 @@ module @hdiff_bundle_4 { } aie.use_lock(%lock26_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -756,7 +756,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -789,7 +789,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -812,7 +812,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -839,7 +839,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -872,7 +872,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -895,7 +895,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_1 = aie.core(%tile3_1) { %lb = arith.constant 0 : index @@ -922,7 +922,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_1 = aie.core(%tile4_1) { %lb = arith.constant 0 : index @@ -955,7 +955,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_1 = aie.core(%tile5_1) { %lb = arith.constant 0 : index @@ -978,7 +978,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_2_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -1006,7 +1006,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -1039,7 +1039,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_2_core5_2 = aie.core(%tile5_2) { @@ -1084,7 +1084,7 @@ module @hdiff_bundle_4 { } aie.use_lock(%lock52_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_3 = aie.core(%tile3_3) { %lb = arith.constant 0 : index @@ -1111,7 +1111,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_3 = aie.core(%tile4_3) { %lb = arith.constant 0 : index @@ -1144,7 +1144,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_3 = aie.core(%tile5_3) { %lb = arith.constant 0 : index @@ -1167,7 +1167,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_2_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_4 = aie.core(%tile3_4) { %lb = arith.constant 0 : index @@ -1194,7 +1194,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_4 = aie.core(%tile4_4) { %lb = arith.constant 0 : index @@ -1227,7 +1227,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_4 = aie.core(%tile5_4) { %lb = arith.constant 0 : index @@ -1250,7 +1250,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_2_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_5 = aie.core(%tile3_5) { %lb = arith.constant 0 : index @@ -1277,7 +1277,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_5 = aie.core(%tile4_5) { %lb = arith.constant 0 : index @@ -1310,7 +1310,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_5 = aie.core(%tile5_5) { %lb = arith.constant 0 : index @@ -1333,7 +1333,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_3_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_6 = aie.core(%tile3_6) { %lb = arith.constant 0 : index @@ -1361,7 +1361,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_6 = aie.core(%tile4_6) { %lb = arith.constant 0 : index @@ -1394,7 +1394,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_3_core5_6 = aie.core(%tile5_6) { @@ -1439,7 +1439,7 @@ module @hdiff_bundle_4 { } aie.use_lock(%lock56_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_7 = aie.core(%tile3_7) { %lb = arith.constant 0 : index @@ -1466,7 +1466,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_7 = aie.core(%tile4_7) { %lb = arith.constant 0 : index @@ -1499,7 +1499,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_7 = aie.core(%tile5_7) { %lb = arith.constant 0 : index @@ -1522,7 +1522,7 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_3_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_8 = aie.core(%tile3_8) { %lb = arith.constant 0 : index @@ -1549,7 +1549,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_8 = aie.core(%tile4_8) { %lb = arith.constant 0 : index @@ -1582,7 +1582,7 @@ module @hdiff_bundle_4 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_8 = aie.core(%tile5_8) { %lb = arith.constant 0 : index @@ -1605,6 +1605,6 @@ module @hdiff_bundle_4 { aie.objectfifo.release(%block_3_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_8.mlir b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_8.mlir index b0e1bbd3946..959f1923cb1 100644 --- a/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_8.mlir +++ b/programming_examples/mlir/horizontal_diffusion/HDIFF_tri_AIE_objectFIFO_ping_pong_scaled/aie_8.mlir @@ -351,9 +351,9 @@ module @hdiff_bundle_8 { aie.objectfifo.register_external_buffers(%tile7_0, %block_7_buf_out_shim_7 : !aie.objectfifo>, {%ext_buffer_out_7}) : (memref<2048xi32>) - func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () - func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () - func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () + func.func private @hdiff_lap(%AL: memref<256xi32>,%BL: memref<256xi32>, %CL: memref<256xi32>, %DL: memref<256xi32>, %EL: memref<256xi32>, %OLL1: memref<256xi32>, %OLL2: memref<256xi32>, %OLL3: memref<256xi32>, %OLL4: memref<256xi32>) -> () attributes {link_with = "hdiff_lap.o"} + func.func private @hdiff_flux1(%AF: memref<256xi32>,%BF: memref<256xi32>, %CF: memref<256xi32>, %OLF1: memref<256xi32>, %OLF2: memref<256xi32>, %OLF3: memref<256xi32>, %OLF4: memref<256xi32>, %OFI1: memref<512xi32>, %OFI2: memref<512xi32>, %OFI3: memref<512xi32>, %OFI4: memref<512xi32>, %OFI5: memref<512xi32>) -> () attributes {link_with = "hdiff_flux1.o"} + func.func private @hdiff_flux2( %Inter1: memref<512xi32>,%Inter2: memref<512xi32>, %Inter3: memref<512xi32>,%Inter4: memref<512xi32>,%Inter5: memref<512xi32>, %Out: memref<256xi32>) -> () attributes {link_with = "hdiff_flux2.o"} %block_0_core0_1 = aie.core(%tile0_1) { %lb = arith.constant 0 : index @@ -380,7 +380,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_1 = aie.core(%tile1_1) { %lb = arith.constant 0 : index @@ -413,7 +413,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_1 = aie.core(%tile2_1) { %lb = arith.constant 0 : index @@ -436,7 +436,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_0_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_2 = aie.core(%tile0_2) { %lb = arith.constant 0 : index @@ -464,7 +464,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_2 = aie.core(%tile1_2) { %lb = arith.constant 0 : index @@ -497,7 +497,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_0_core2_2 = aie.core(%tile2_2) { @@ -542,7 +542,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock22_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_3 = aie.core(%tile0_3) { %lb = arith.constant 0 : index @@ -569,7 +569,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_3 = aie.core(%tile1_3) { %lb = arith.constant 0 : index @@ -602,7 +602,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_3 = aie.core(%tile2_3) { %lb = arith.constant 0 : index @@ -625,7 +625,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_0_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_0_core0_4 = aie.core(%tile0_4) { %lb = arith.constant 0 : index @@ -652,7 +652,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_0_core1_4 = aie.core(%tile1_4) { %lb = arith.constant 0 : index @@ -685,7 +685,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_0_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_0_core2_4 = aie.core(%tile2_4) { %lb = arith.constant 0 : index @@ -708,7 +708,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_0_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_5 = aie.core(%tile0_5) { %lb = arith.constant 0 : index @@ -735,7 +735,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_5 = aie.core(%tile1_5) { %lb = arith.constant 0 : index @@ -768,7 +768,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_5 = aie.core(%tile2_5) { %lb = arith.constant 0 : index @@ -791,7 +791,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_1_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_6 = aie.core(%tile0_6) { %lb = arith.constant 0 : index @@ -819,7 +819,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_6 = aie.core(%tile1_6) { %lb = arith.constant 0 : index @@ -852,7 +852,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_1_core2_6 = aie.core(%tile2_6) { @@ -897,7 +897,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock26_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_7 = aie.core(%tile0_7) { %lb = arith.constant 0 : index @@ -924,7 +924,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_7 = aie.core(%tile1_7) { %lb = arith.constant 0 : index @@ -957,7 +957,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_7 = aie.core(%tile2_7) { %lb = arith.constant 0 : index @@ -980,7 +980,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_1_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_1_core0_8 = aie.core(%tile0_8) { %lb = arith.constant 0 : index @@ -1007,7 +1007,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_1_core1_8 = aie.core(%tile1_8) { %lb = arith.constant 0 : index @@ -1040,7 +1040,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_1_buf_in_shim_2: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_1_core2_8 = aie.core(%tile2_8) { %lb = arith.constant 0 : index @@ -1063,7 +1063,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_1_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_1 = aie.core(%tile3_1) { %lb = arith.constant 0 : index @@ -1090,7 +1090,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_1 = aie.core(%tile4_1) { %lb = arith.constant 0 : index @@ -1123,7 +1123,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_1 = aie.core(%tile5_1) { %lb = arith.constant 0 : index @@ -1146,7 +1146,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_2_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_2 = aie.core(%tile3_2) { %lb = arith.constant 0 : index @@ -1174,7 +1174,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_2 = aie.core(%tile4_2) { %lb = arith.constant 0 : index @@ -1207,7 +1207,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_2_core5_2 = aie.core(%tile5_2) { @@ -1252,7 +1252,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock52_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_3 = aie.core(%tile3_3) { %lb = arith.constant 0 : index @@ -1279,7 +1279,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_3 = aie.core(%tile4_3) { %lb = arith.constant 0 : index @@ -1312,7 +1312,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_3 = aie.core(%tile5_3) { %lb = arith.constant 0 : index @@ -1335,7 +1335,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_2_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_2_core3_4 = aie.core(%tile3_4) { %lb = arith.constant 0 : index @@ -1362,7 +1362,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_2_core4_4 = aie.core(%tile4_4) { %lb = arith.constant 0 : index @@ -1395,7 +1395,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_2_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_2_core5_4 = aie.core(%tile5_4) { %lb = arith.constant 0 : index @@ -1418,7 +1418,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_2_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_5 = aie.core(%tile3_5) { %lb = arith.constant 0 : index @@ -1445,7 +1445,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_5 = aie.core(%tile4_5) { %lb = arith.constant 0 : index @@ -1478,7 +1478,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_5 = aie.core(%tile5_5) { %lb = arith.constant 0 : index @@ -1501,7 +1501,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_3_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_6 = aie.core(%tile3_6) { %lb = arith.constant 0 : index @@ -1529,7 +1529,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_6 = aie.core(%tile4_6) { %lb = arith.constant 0 : index @@ -1562,7 +1562,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_3_core5_6 = aie.core(%tile5_6) { @@ -1607,7 +1607,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock56_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_7 = aie.core(%tile3_7) { %lb = arith.constant 0 : index @@ -1634,7 +1634,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_7 = aie.core(%tile4_7) { %lb = arith.constant 0 : index @@ -1667,7 +1667,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_7 = aie.core(%tile5_7) { %lb = arith.constant 0 : index @@ -1690,7 +1690,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_3_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_3_core3_8 = aie.core(%tile3_8) { %lb = arith.constant 0 : index @@ -1717,7 +1717,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_3_core4_8 = aie.core(%tile4_8) { %lb = arith.constant 0 : index @@ -1750,7 +1750,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_3_buf_in_shim_3: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_3_core5_8 = aie.core(%tile5_8) { %lb = arith.constant 0 : index @@ -1773,7 +1773,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_3_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_1 = aie.core(%tile6_1) { %lb = arith.constant 0 : index @@ -1800,7 +1800,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_1 = aie.core(%tile7_1) { %lb = arith.constant 0 : index @@ -1833,7 +1833,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_1 = aie.core(%tile8_1) { %lb = arith.constant 0 : index @@ -1856,7 +1856,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_4_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_2 = aie.core(%tile6_2) { %lb = arith.constant 0 : index @@ -1884,7 +1884,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_2 = aie.core(%tile7_2) { %lb = arith.constant 0 : index @@ -1917,7 +1917,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_4_core8_2 = aie.core(%tile8_2) { @@ -1962,7 +1962,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock82_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_3 = aie.core(%tile6_3) { %lb = arith.constant 0 : index @@ -1989,7 +1989,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_3 = aie.core(%tile7_3) { %lb = arith.constant 0 : index @@ -2022,7 +2022,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_3 = aie.core(%tile8_3) { %lb = arith.constant 0 : index @@ -2045,7 +2045,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_4_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_4_core6_4 = aie.core(%tile6_4) { %lb = arith.constant 0 : index @@ -2072,7 +2072,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_4_core7_4 = aie.core(%tile7_4) { %lb = arith.constant 0 : index @@ -2105,7 +2105,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_4_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_4_core8_4 = aie.core(%tile8_4) { %lb = arith.constant 0 : index @@ -2128,7 +2128,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_4_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_5 = aie.core(%tile6_5) { %lb = arith.constant 0 : index @@ -2155,7 +2155,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_5 = aie.core(%tile7_5) { %lb = arith.constant 0 : index @@ -2188,7 +2188,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_5 = aie.core(%tile8_5) { %lb = arith.constant 0 : index @@ -2211,7 +2211,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_5_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_6 = aie.core(%tile6_6) { %lb = arith.constant 0 : index @@ -2239,7 +2239,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_6 = aie.core(%tile7_6) { %lb = arith.constant 0 : index @@ -2272,7 +2272,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_5_core8_6 = aie.core(%tile8_6) { @@ -2317,7 +2317,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock86_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_7 = aie.core(%tile6_7) { %lb = arith.constant 0 : index @@ -2344,7 +2344,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_7 = aie.core(%tile7_7) { %lb = arith.constant 0 : index @@ -2377,7 +2377,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_7 = aie.core(%tile8_7) { %lb = arith.constant 0 : index @@ -2400,7 +2400,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_5_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_5_core6_8 = aie.core(%tile6_8) { %lb = arith.constant 0 : index @@ -2427,7 +2427,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_5_core7_8 = aie.core(%tile7_8) { %lb = arith.constant 0 : index @@ -2460,7 +2460,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_5_buf_in_shim_6: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_5_core8_8 = aie.core(%tile8_8) { %lb = arith.constant 0 : index @@ -2483,7 +2483,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_5_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_1 = aie.core(%tile9_1) { %lb = arith.constant 0 : index @@ -2510,7 +2510,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_1 = aie.core(%tile10_1) { %lb = arith.constant 0 : index @@ -2543,7 +2543,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_1 = aie.core(%tile11_1) { %lb = arith.constant 0 : index @@ -2566,7 +2566,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_6_buf_row_1_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_2 = aie.core(%tile9_2) { %lb = arith.constant 0 : index @@ -2594,7 +2594,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_2 = aie.core(%tile10_2) { %lb = arith.constant 0 : index @@ -2627,7 +2627,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_6_core11_2 = aie.core(%tile11_2) { @@ -2672,7 +2672,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock112_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_3 = aie.core(%tile9_3) { %lb = arith.constant 0 : index @@ -2699,7 +2699,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_3 = aie.core(%tile10_3) { %lb = arith.constant 0 : index @@ -2732,7 +2732,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_3 = aie.core(%tile11_3) { %lb = arith.constant 0 : index @@ -2755,7 +2755,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_6_buf_row_3_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_6_core9_4 = aie.core(%tile9_4) { %lb = arith.constant 0 : index @@ -2782,7 +2782,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_6_core10_4 = aie.core(%tile10_4) { %lb = arith.constant 0 : index @@ -2815,7 +2815,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_6_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_6_core11_4 = aie.core(%tile11_4) { %lb = arith.constant 0 : index @@ -2838,7 +2838,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_6_buf_row_4_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_5 = aie.core(%tile9_5) { %lb = arith.constant 0 : index @@ -2865,7 +2865,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_5 = aie.core(%tile10_5) { %lb = arith.constant 0 : index @@ -2898,7 +2898,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_5 = aie.core(%tile11_5) { %lb = arith.constant 0 : index @@ -2921,7 +2921,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_7_buf_row_5_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_6 = aie.core(%tile9_6) { %lb = arith.constant 0 : index @@ -2949,7 +2949,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_6 = aie.core(%tile10_6) { %lb = arith.constant 0 : index @@ -2982,7 +2982,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } // Gathering Tile %block_7_core11_6 = aie.core(%tile11_6) { @@ -3027,7 +3027,7 @@ module @hdiff_bundle_8 { } aie.use_lock(%lock116_14, "Acquire", 0) // stop the timer aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_7 = aie.core(%tile9_7) { %lb = arith.constant 0 : index @@ -3054,7 +3054,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_7 = aie.core(%tile10_7) { %lb = arith.constant 0 : index @@ -3087,7 +3087,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_7 = aie.core(%tile11_7) { %lb = arith.constant 0 : index @@ -3110,7 +3110,7 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_7_buf_row_7_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } %block_7_core9_8 = aie.core(%tile9_8) { %lb = arith.constant 0 : index @@ -3137,7 +3137,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 4) aie.end - } { link_with="hdiff_lap.o" } + } %block_7_core10_8 = aie.core(%tile10_8) { %lb = arith.constant 0 : index @@ -3170,7 +3170,7 @@ module @hdiff_bundle_8 { } aie.objectfifo.release(%block_7_buf_in_shim_7: !aie.objectfifo>, 7) aie.end - } { link_with="hdiff_flux1.o" } + } %block_7_core11_8 = aie.core(%tile11_8) { %lb = arith.constant 0 : index @@ -3193,6 +3193,6 @@ module @hdiff_bundle_8 { aie.objectfifo.release(%block_7_buf_row_8_out_flx2 :!aie.objectfifo>, 1) } aie.end - } { link_with="hdiff_flux2.o" } + } } diff --git a/programming_examples/mlir/idct/aie.mlir b/programming_examples/mlir/idct/aie.mlir index 1842fd6bbbe..fda6a60977c 100644 --- a/programming_examples/mlir/idct/aie.mlir +++ b/programming_examples/mlir/idct/aie.mlir @@ -59,9 +59,9 @@ module @idct { aie.flow(%t74, DMA : 1, %t75, DMA : 0) aie.flow(%t75, DMA : 1, %t70, DMA : 0) - func.func private @dequant_8x8(%A: memref<64xi16>, %B: memref<64xi16>) -> () - func.func private @idct_8x8_mmult_h(%A: memref<64xi16>, %B: memref<64xi16>) -> () - func.func private @idct_8x8_mmult_v(%A: memref<64xi16>, %B: memref<64xi16>) -> () + func.func private @dequant_8x8(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "dequant.o"} + func.func private @idct_8x8_mmult_h(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "idct_horizontal.o"} + func.func private @idct_8x8_mmult_v(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "idct_vertical.o"} %c13 = aie.core(%t73) { %lb = arith.constant 0 : index @@ -83,7 +83,7 @@ module @idct { } aie.end - } { link_with="dequant.o" } + } %c74 = aie.core(%t74) { %lb = arith.constant 0 : index @@ -105,7 +105,7 @@ module @idct { } aie.end - } { link_with="idct_horizontal.o" } + } %c75 = aie.core(%t75) { %lb = arith.constant 0 : index @@ -127,7 +127,7 @@ module @idct { } aie.end - } { link_with="idct_vertical.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/programming_examples/mlir/idct/objectFifo_circuit_switched_version/aie.mlir b/programming_examples/mlir/idct/objectFifo_circuit_switched_version/aie.mlir index 6956f394961..5a5b3d45d3b 100755 --- a/programming_examples/mlir/idct/objectFifo_circuit_switched_version/aie.mlir +++ b/programming_examples/mlir/idct/objectFifo_circuit_switched_version/aie.mlir @@ -34,9 +34,9 @@ module @idct { aie.objectfifo.register_external_buffers @of_in (%t70, {%buffer_in}) : (memref<512xi16>) aie.objectfifo.register_external_buffers @of_out (%t70, {%buffer_out}) : (memref<512xi16>) - func.func private @dequant_8x8(%A: memref<64xi16>, %B: memref<64xi16>) -> () - func.func private @idct_8x8_mmult_h(%A: memref<64xi16>, %B: memref<64xi16>) -> () - func.func private @idct_8x8_mmult_v(%A: memref<64xi16>, %B: memref<64xi16>) -> () + func.func private @dequant_8x8(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "dequant.o"} + func.func private @idct_8x8_mmult_h(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "idct_horizontal.o"} + func.func private @idct_8x8_mmult_v(%A: memref<64xi16>, %B: memref<64xi16>) -> () attributes {link_with = "idct_vertical.o"} func.func private @pass(%A: memref<64xi16>, %B: memref<64xi16>) -> () %c13 = aie.core(%t73) { @@ -63,7 +63,7 @@ module @idct { } aie.end - } { link_with="dequant.o" } + } %c74 = aie.core(%t74) { %lb = arith.constant 0 : index @@ -89,7 +89,7 @@ module @idct { } aie.end - } { link_with="idct_horizontal.o" } + } %c75 = aie.core(%t75) { %lb = arith.constant 0 : index @@ -115,5 +115,5 @@ module @idct { } aie.end - } { link_with="idct_vertical.o" } + } } diff --git a/programming_examples/vision/color_detect/Makefile b/programming_examples/vision/color_detect/Makefile index a64fe8bd4f5..808bf7118a4 100755 --- a/programming_examples/vision/color_detect/Makefile +++ b/programming_examples/vision/color_detect/Makefile @@ -45,15 +45,11 @@ else echo "Device type not supported" endif -build/combined_bitwiseOR_gray2rgba_bitwiseAND.a: build/bitwiseOR.cc.o build/gray2rgba.cc.o build/bitwiseAND.cc.o - mkdir -p ${@D} - ar rvs $@ $< $(word 2,$^) $(word 3,$^) - build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir: ${srcdir}/${aie_py_src} mkdir -p ${@D} python3 $< ${device} ${COLORDETECT_WIDTH} ${COLORDETECT_HEIGHT} > $@ -build/final_${COLORDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir build/rgba2hue.cc.o build/threshold.cc.o build/combined_bitwiseOR_gray2rgba_bitwiseAND.a +build/final_${COLORDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir build/rgba2hue.cc.o build/threshold.cc.o build/bitwiseOR.cc.o build/gray2rgba.cc.o build/bitwiseAND.cc.o mkdir -p ${@D} ifeq ($(device),npu) cd ${@D} && aiecc --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host --alloc-scheme=basic-sequential \ diff --git a/programming_examples/vision/color_detect/color_detect.py b/programming_examples/vision/color_detect/color_detect.py index f98cd8b59c6..0b462ca7735 100644 --- a/programming_examples/vision/color_detect/color_detect.py +++ b/programming_examples/vision/color_detect/color_detect.py @@ -36,17 +36,17 @@ def color_detect(dev, width, height): ) bitwiseORLine = Kernel( "bitwiseORLine", - "combined_bitwiseOR_gray2rgba_bitwiseAND.a", + "bitwiseOR.cc.o", [line_ty, line_ty, line_ty, np.int32], ) gray2rgbaLine = Kernel( "gray2rgbaLine", - "combined_bitwiseOR_gray2rgba_bitwiseAND.a", + "gray2rgba.cc.o", [line_ty, line_bytes_ty, np.int32], ) bitwiseANDLine = Kernel( "bitwiseANDLine", - "combined_bitwiseOR_gray2rgba_bitwiseAND.a", + "bitwiseAND.cc.o", [line_bytes_ty, line_bytes_ty, line_bytes_ty, np.int32], ) diff --git a/programming_examples/vision/color_detect/color_detect_placed.py b/programming_examples/vision/color_detect/color_detect_placed.py index d0654fbd985..fb6844e7dc3 100644 --- a/programming_examples/vision/color_detect/color_detect_placed.py +++ b/programming_examples/vision/color_detect/color_detect_placed.py @@ -37,21 +37,29 @@ def deviceBody(): # AIE Core Function declarations rgba2hueLine = external_func( - "rgba2hueLine", inputs=[line_bytes_ty, line_ty, np.int32] + "rgba2hueLine", + inputs=[line_bytes_ty, line_ty, np.int32], + link_with="rgba2hue.cc.o", ) thresholdLine = external_func( "thresholdLine", inputs=[line_ty, line_ty, np.int32, np.int16, np.int16, np.int8], + link_with="threshold.cc.o", ) bitwiseORLine = external_func( - "bitwiseORLine", inputs=[line_ty, line_ty, line_ty, np.int32] + "bitwiseORLine", + inputs=[line_ty, line_ty, line_ty, np.int32], + link_with="bitwiseOR.cc.o", ) gray2rgbaLine = external_func( - "gray2rgbaLine", inputs=[line_ty, line_bytes_ty, np.int32] + "gray2rgbaLine", + inputs=[line_ty, line_bytes_ty, np.int32], + link_with="gray2rgba.cc.o", ) bitwiseANDLine = external_func( "bitwiseANDLine", inputs=[line_bytes_ty, line_bytes_ty, line_bytes_ty, np.int32], + link_with="bitwiseAND.cc.o", ) # Tile declarations @@ -94,7 +102,7 @@ def deviceBody(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "rgba2hue.cc.o") + @core(ComputeTile2) def coreBody(): for _ in range_(sys.maxsize): elemIn = inOF_L3L2.acquire(ObjectFifoPort.Consume, 1) @@ -104,7 +112,7 @@ def coreBody(): OF_2to34.release(ObjectFifoPort.Produce, 1) # Compute tile 3 - @core(ComputeTile3, "threshold.cc.o") + @core(ComputeTile3) def coreBody(): thresholdValueUpper1 = 40 thresholdValueLower1 = 30 @@ -138,7 +146,7 @@ def coreBody(): OF_3to5.release(ObjectFifoPort.Produce, 1) # Compute tile 4 - @core(ComputeTile4, "threshold.cc.o") + @core(ComputeTile4) def coreBody(): thresholdValueUpper1 = 160 thresholdValueLower1 = 90 @@ -172,7 +180,7 @@ def coreBody(): OF_4to5.release(ObjectFifoPort.Produce, 1) # Compute tile 5 - @core(ComputeTile5, "combined_bitwiseOR_gray2rgba_bitwiseAND.a") + @core(ComputeTile5) def coreBody(): for _ in range_(sys.maxsize): # bitwise OR diff --git a/programming_examples/vision/color_threshold/color_threshold_placed.py b/programming_examples/vision/color_threshold/color_threshold_placed.py index 77bb5bb2da8..4ebc50b6c07 100644 --- a/programming_examples/vision/color_threshold/color_threshold_placed.py +++ b/programming_examples/vision/color_threshold/color_threshold_placed.py @@ -29,6 +29,7 @@ def device_body(): thresholdLine = external_func( "thresholdLine", inputs=[line_ty, line_ty, np.int32, np.int16, np.int16, np.int8], + link_with="threshold.cc.o", ) # Tile declarations @@ -103,7 +104,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "threshold.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): # RTPs written from the instruction stream must be synchronized with the runtime sequence @@ -129,7 +130,7 @@ def core_body(): outOOB_L1L2_0.release(ObjectFifoPort.Produce, 1) # Compute tile 3 - @core(ComputeTile3, "threshold.cc.o") + @core(ComputeTile3) def core_body(): for _ in range_(sys.maxsize): # RTPs written from the instruction stream must be synchronized with the runtime sequence @@ -155,7 +156,7 @@ def core_body(): outOOB_L1L2_1.release(ObjectFifoPort.Produce, 1) # Compute tile 4 - @core(ComputeTile4, "threshold.cc.o") + @core(ComputeTile4) def core_body(): for _ in range_(sys.maxsize): # RTPs written from the instruction stream must be synchronized with the runtime sequence @@ -181,7 +182,7 @@ def core_body(): outOOB_L1L2_2.release(ObjectFifoPort.Produce, 1) # Compute tile 5 - @core(ComputeTile5, "threshold.cc.o") + @core(ComputeTile5) def core_body(): for _ in range_(sys.maxsize): # RTPs written from the instruction stream must be synchronized with the runtime sequence diff --git a/programming_examples/vision/edge_detect/Makefile b/programming_examples/vision/edge_detect/Makefile index 92a27e341fa..ad69129f1b8 100755 --- a/programming_examples/vision/edge_detect/Makefile +++ b/programming_examples/vision/edge_detect/Makefile @@ -52,15 +52,11 @@ else echo "Device type not supported" endif -build/combined_gray2rgba_addWeighted.a: build/gray2rgba.cc.o build/addWeighted.cc.o - mkdir -p ${@D} - ar rvs $@ $< $(word 2,$^) - build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir: ${srcdir}/${aie_py_src} mkdir -p ${@D} python3 $< ${device} ${EDGEDETECT_WIDTH} ${EDGEDETECT_HEIGHT} > $@ -build/final_${EDGEDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir build/rgba2gray.cc.o build/gray2rgba.cc.o build/filter2d.cc.o build/threshold.cc.o build/addWeighted.cc.o build/combined_gray2rgba_addWeighted.a +build/final_${EDGEDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${EDGEDETECT_WIDTH}.mlir build/rgba2gray.cc.o build/gray2rgba.cc.o build/filter2d.cc.o build/threshold.cc.o build/addWeighted.cc.o mkdir -p ${@D} cd ${@D} && aiecc --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host --alloc-scheme=basic-sequential \ --no-xchesscc --no-xbridge \ diff --git a/programming_examples/vision/edge_detect/edge_detect.py b/programming_examples/vision/edge_detect/edge_detect.py index d6e2154c4a8..b6d3c189093 100644 --- a/programming_examples/vision/edge_detect/edge_detect.py +++ b/programming_examples/vision/edge_detect/edge_detect.py @@ -42,12 +42,12 @@ def edge_detect(dev, width, height): ) gray2rgba_line_kernel = Kernel( "gray2rgbaLine", - "combined_gray2rgba_addWeighted.a", + "gray2rgba.cc.o", [line_ty, line_bytes_ty, np.int32], ) add_weighted_line_kernel = Kernel( "addWeightedLine", - "combined_gray2rgba_addWeighted.a", + "addWeighted.cc.o", [ line_bytes_ty, line_bytes_ty, diff --git a/programming_examples/vision/edge_detect/edge_detect_placed.py b/programming_examples/vision/edge_detect/edge_detect_placed.py index 2dac6746da7..813e9e40fb6 100644 --- a/programming_examples/vision/edge_detect/edge_detect_placed.py +++ b/programming_examples/vision/edge_detect/edge_detect_placed.py @@ -34,18 +34,24 @@ def device_body(): # AIE Core Function declarations rgba2gray_line = external_func( - "rgba2grayLine", inputs=[line_bytes_ty, line_ty, np.int32] + "rgba2grayLine", + inputs=[line_bytes_ty, line_ty, np.int32], + link_with="rgba2gray.cc.o", ) filter2d_line = external_func( "filter2dLine", inputs=[line_ty, line_ty, line_ty, line_ty, np.int32, tensor_3x3_ty], + link_with="filter2d.cc.o", ) threshold_line = external_func( "thresholdLine", inputs=[line_ty, line_ty, np.int32, np.int16, np.int16, np.int8], + link_with="threshold.cc.o", ) gray2rgba_line = external_func( - "gray2rgbaLine", inputs=[line_ty, line_bytes_ty, np.int32] + "gray2rgbaLine", + inputs=[line_ty, line_bytes_ty, np.int32], + link_with="gray2rgba.cc.o", ) add_weighted_line = external_func( "addWeightedLine", @@ -58,6 +64,7 @@ def device_body(): np.int16, np.int8, ], + link_with="addWeighted.cc.o", ) # Tile declarations @@ -136,7 +143,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "rgba2gray.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): elem_in = inOF_L3L2.acquire(ObjectFifoPort.Consume, 1) @@ -148,7 +155,7 @@ def core_body(): OF_2to3.release(ObjectFifoPort.Produce, 1) # Compute tile 3 - @core(ComputeTile3, "filter2d.cc.o") + @core(ComputeTile3) def core_body(): v0 = 0 v1 = 4096 @@ -207,7 +214,7 @@ def core_body(): OF_3to4.release(ObjectFifoPort.Produce, 1) # Compute tile 4 - @core(ComputeTile4, "threshold.cc.o") + @core(ComputeTile4) def core_body(): v_thr = 10 v_max = 255 @@ -223,7 +230,7 @@ def core_body(): OF_4to5.release(ObjectFifoPort.Produce, 1) # Compute tile 5 - @core(ComputeTile5, "combined_gray2rgba_addWeighted.a") + @core(ComputeTile5) def core_body(): for _ in range_(sys.maxsize): elem_in = OF_4to5.acquire(ObjectFifoPort.Consume, 1) diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir index af7a4f05e7e..8797d4688f3 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir @@ -14,7 +14,7 @@ module @passThroughLine_aie2 { aie.device(npu) { // declare kernel external kernel function - func.func private @passThroughLine(%in: memref<1920xui8>, %out: memref<1920xui8>, %tilewidth: i32) -> () + func.func private @passThroughLine(%in: memref<1920xui8>, %out: memref<1920xui8>, %tilewidth: i32) -> () attributes {link_with = "passThrough.cc.o"} // Declare tile object of the AIE class located at position col 1, row 4 %tile00 = aie.tile(0, 0) @@ -44,7 +44,7 @@ module @passThroughLine_aie2 { aie.objectfifo.release @outOF(Produce, 1) } aie.end - } { link_with="passThrough.cc.o" } // indicate kernel object name used by this core + } // indicate kernel object name used by this core aie.runtime_sequence(%in : memref<518400xi32>, %arg1 : memref<1xi32>, %out : memref<518400xi32>) { %c0 = arith.constant 0 : i64 diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir index 12d2855713b..0a8bca4e01e 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir @@ -14,7 +14,7 @@ module @passThroughLine_aie2 { aie.device(npu) { // declare kernel external kernel function - func.func private @passThroughLine(%in: memref<7680xui8>, %out: memref<7680xui8>, %tilewidth: i32) -> () + func.func private @passThroughLine(%in: memref<7680xui8>, %out: memref<7680xui8>, %tilewidth: i32) -> () attributes {link_with = "passThrough.cc.o"} // Declare tile object of the AIE class located at position col 1, row 4 %tile00 = aie.tile(0, 0) @@ -44,7 +44,7 @@ module @passThroughLine_aie2 { aie.objectfifo.release @outOF(Produce, 1) } aie.end - } { link_with="passThrough.cc.o" } // indicate kernel object name used by this core + } // indicate kernel object name used by this core aie.runtime_sequence(%in : memref<2073600xi32>, %arg1 : memref<1xi32>, %out : memref<2073600xi32>) { %c0 = arith.constant 0 : i64 diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir index d4929df35f7..7107263f8b6 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir @@ -14,7 +14,7 @@ module @passThroughLine_aie2 { aie.device(npu) { // declare kernel external kernel function - func.func private @passThroughLine(%in: memref<512xui8>, %out: memref<512xui8>, %tilewidth: i32) -> () + func.func private @passThroughLine(%in: memref<512xui8>, %out: memref<512xui8>, %tilewidth: i32) -> () attributes {link_with = "passThrough.cc.o"} // Declare tile object of the AIE class located at position col 1, row 4 %tile00 = aie.tile(0, 0) @@ -44,7 +44,7 @@ module @passThroughLine_aie2 { aie.objectfifo.release @outOF(Produce, 1) } aie.end - } { link_with="passThrough.cc.o" } // indicate kernel object name used by this core + } // indicate kernel object name used by this core aie.runtime_sequence(%in : memref<1152xi32>, %arg1 : memref<1xi32>, %out : memref<1152xi32>) { %c0 = arith.constant 0 : i64 diff --git a/programming_examples/vision/vision_passthrough/vision_passthrough_placed.py b/programming_examples/vision/vision_passthrough/vision_passthrough_placed.py index 681932c7f8b..5d72215d4fd 100644 --- a/programming_examples/vision/vision_passthrough/vision_passthrough_placed.py +++ b/programming_examples/vision/vision_passthrough/vision_passthrough_placed.py @@ -29,7 +29,9 @@ def device_body(): # AIE Core Function declarations passThroughLine = external_func( - "passThroughLine", inputs=[line_ty, line_ty, np.int32] + "passThroughLine", + inputs=[line_ty, line_ty, np.int32], + link_with="passThrough.cc.o", ) # Tile declarations @@ -46,7 +48,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "passThrough.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): for _ in range_(height): diff --git a/programming_guide/quick_reference.md b/programming_guide/quick_reference.md index 97dbcb1233e..6fb43de5a8f 100644 --- a/programming_guide/quick_reference.md +++ b/programming_guide/quick_reference.md @@ -22,7 +22,7 @@ | Function Signature | Definition | Parameters | Return Type | Example | |---------------------|------------|------------|-------------|---------| | `tile(column, row)` | Declare AI Engine tile | `column`: column index number
`row`: row index number | `` | ComputeTile = tile(1,3) | -| `external_func(name, inputs, output)` | Declare external kernel function that will run on AIE Cores| `name`: external function name
`input`: list of input types
`output`: list of output types | `` | scale_scalar = external_func("vector_scalar_mul_aie_scalar", inputs=[tensor_ty, tensor_ty, np.ndarray[(1,), np.dtype[np.int32]]]) | | +| `external_func(name, inputs, output, link_with=None)` | Declare external kernel function that will run on AIE Cores. Multiple `external_func` declarations may share the same `link_with` object file; the compiler deduplicates automatically. | `name`: external function name
`input`: list of input types
`output`: list of output types
`link_with` (optional): path to the compiled object file (`.o`) that implements this function | `` | scale_scalar = external_func("vector_scalar_mul_aie_scalar", inputs=[tensor_ty, tensor_ty, np.ndarray[(1,), np.dtype[np.int32]]], link_with="scale.o") | | | `npu_dma_memcpy_nd(metadata, bd_id, mem, sizes)` | configure n-dimensional DMA accessing external memory | `metadata`: ObjectFifo python object or string with name of `object_fifo`
`bd_id`: Identifier number
`mem`: memory for transfer
`sizes`: 4-D transfer size in 4B granularity | `None` | npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) | | `dma_wait(object_fifo, ...)` | configure host-ShimDMA synchronization for accessing external memory | `metadata`: Identifies the ObjectFifo (by Python object or name string) whose half-DMA completion we are waiting on. This is a variable argument function that can accept one or more metadatas at once, to be waited on in order given, | `None` | dma_wait(of_out) | | `npu_sync(column, row, direction, channel, column_num=1, row_num=1)` | alternative method to configure host-ShimDMA synchronization for accessing external memory | `column` and `row`: Specify the tile location for initiating the synchronization.
`direction`: Indicates the DMA direction (0 for write to host, 1 for read from host).
`channel`: Identifies the DMA channel (0 or 1) for the synchronization token
`column_num` and `row_num` (optional): Define the range of tiles to wait for synchronization| `None` | npu_sync(column=0, row=0, direction=0, channel=1) | diff --git a/programming_guide/section-4/section-4b/aie2_placed.py b/programming_guide/section-4/section-4b/aie2_placed.py index ff60baaab20..73b9d91c763 100644 --- a/programming_guide/section-4/section-4b/aie2_placed.py +++ b/programming_guide/section-4/section-4b/aie2_placed.py @@ -39,6 +39,7 @@ def device_body(): scale_scalar = external_func( "vector_scalar_mul_aie_scalar", inputs=[tile_ty, tile_ty, scalar_ty, in2_dtype], + link_with="scale.o", ) # Tile declarations @@ -52,7 +53,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "scale.o") + @core(ComputeTile2) def core_body(): # Effective while(1) for _ in range_(sys.maxsize): diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c09b51ed147..12657ab44db 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -49,7 +49,6 @@ declare_mlir_python_sources(AIEPythonSources.Utils utils/hostruntime/xrtruntime/hostruntime.py utils/hostruntime/xrtruntime/tensor.py utils/compile/__init__.py - utils/compile/link.py utils/compile/utils.py utils/compile/cache/circular_cache.py utils/compile/cache/utils.py diff --git a/python/dialects/aie.py b/python/dialects/aie.py index ddff535e37f..43b6c1b26c1 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -88,7 +88,23 @@ def __init__(self, buffer, index, value, loc=None, ip=None): class external_func(FuncOp): - def __init__(self, name: str, inputs, outputs=None, visibility="private"): + """A ``func.func`` declaration for an externally-defined AIE core function. + + Args: + name: Symbol name of the function. + inputs: List of input types (numpy dtypes or MLIR types). + outputs: List of output types. Defaults to []. + visibility: MLIR symbol visibility. Defaults to ``"private"``. + link_with: Optional path to the object file (``.o``) that implements + this function. Sets the ``link_with`` string attribute on the + generated ``func.func`` op; the ``aie-assign-core-link-files`` pass + reads this attribute and propagates it into the CoreOp's + ``link_files`` attribute for the linker. + """ + + def __init__( + self, name: str, inputs, outputs=None, visibility="private", link_with=None + ): if outputs is None: outputs = [] for i, ty in enumerate(inputs): @@ -102,6 +118,8 @@ def __init__(self, name: str, inputs, outputs=None, visibility="private"): super().__init__( name=name, type=FunctionType.get(inputs, outputs), visibility=visibility ) + if link_with is not None: + self.operation.attributes["link_with"] = StringAttr.get(link_with) def __call__(self, *call_args): return call(self, call_args) @@ -274,11 +292,17 @@ class Core(CoreOp): def __init__( self, tile, link_with=None, dynamic_objfifo_lowering=None, stack_size=None ): + if link_with is not None: + raise TypeError( + "Core() no longer accepts link_with. " + "Set link_with= on each external_func() declaration instead; " + "the aie-assign-core-link-files pass aggregates them onto the core." + ) super().__init__( result=T.index(), tile=tile, stack_size=stack_size, - link_with=link_with, + link_with=None, dynamic_objfifo_lowering=dynamic_objfifo_lowering, ) diff --git a/python/iron/kernel.py b/python/iron/kernel.py index 2641beb65cb..ec7c8183d69 100644 --- a/python/iron/kernel.py +++ b/python/iron/kernel.py @@ -21,55 +21,104 @@ class BaseKernel(Resolvable): - """Base class for kernel-like objects that resolve to FuncOp.""" + """Base class for AIE core functions that resolve to a func.func declaration. - def __init__(self, name: str, arg_types: list[type[np.ndarray] | np.dtype] = []): - """Initialize base kernel. + Subclasses: + Kernel: wraps a pre-compiled object file. + ExternalFunction: compiles C/C++ source at JIT time. + """ + def __init__(self, name: str, arg_types: list[type[np.ndarray] | np.dtype] = []): + """ Args: - name (str): The name of the function - arg_types (list[type[np.ndarray] | np.dtype], optional): The type signature of the function. Defaults to []. + name: Symbol name of the function. + arg_types: Type signature of the function arguments. Defaults to []. """ if not name: - raise ValueError("The name of a kernel cannot be empty or null.") + raise ValueError("Kernel name cannot be empty.") self._name = name self._arg_types = arg_types self._op: FuncOp | None = None + def tile_size(self, arg_index: int = 0) -> int: + """Return the first dimension of the array argument at ``arg_index``. + + Args: + arg_index: Index into ``arg_types``. Defaults to 0. + """ + if not self._arg_types: + raise ValueError("No argument types defined.") + if arg_index >= len(self._arg_types): + raise ValueError( + f"Argument index {arg_index} out of range " + f"(max: {len(self._arg_types) - 1})" + ) + arg = self._arg_types[arg_index] + + # numpy array type, e.g. np.ndarray[(16,), np.dtype[np.int32]] + if hasattr(arg, "__args__") and len(arg.__args__) > 0: + shape_arg = arg.__args__[0] + if isinstance(shape_arg, tuple) and len(shape_arg) > 0: + return shape_arg[0] + + # MLIR MemRefType + if hasattr(arg, "shape") and len(arg.shape) > 0: + return arg.shape[0] + + raise ValueError( + f"Argument {arg_index} does not have a shape or is not an array type." + ) + + def arg_types(self) -> list: + """Return a copy of the argument type list.""" + return self._arg_types.copy() + def __call__(self, *args, **kwargs): - """Call the kernel with the given arguments.""" + """Emit a func.call to this kernel, validating argument count.""" if not self._op: - raise ValueError("Need to resolve kernel before it can be called") - arg_ops = [] - for a in args: - if isinstance(a, Buffer): - arg_ops.append(a.op) - else: - arg_ops.append(a) + raise ValueError("Kernel must be resolved before it can be called.") + if len(args) != len(self._arg_types): + raise ValueError( + f"Kernel '{self._name}' expects {len(self._arg_types)} " + f"argument(s), but {len(args)} were provided." + ) + arg_ops = [a.op if isinstance(a, Buffer) else a for a in args] call(self._op, arg_ops, **kwargs) class Kernel(BaseKernel): + """An AIE core function backed by a pre-compiled object file. + + Use :class:`ExternalFunction` instead when you want to compile from + C/C++ source at JIT time. + + ``resolve()`` emits a ``func.func private`` declaration with a + ``link_with`` attribute naming ``object_file_name``. The + ``aie-assign-core-link-files`` pass propagates this into the CoreOp's + ``link_files`` attribute so the linker knows which file to include. + """ + def __init__( self, name: str, - bin_name: str, + object_file_name: str, arg_types: list[type[np.ndarray] | np.dtype] = [], ) -> None: - """A Kernel is an externally defined function that eventually resolves to a FuncOp. If it is called, - a CallOp will be generated. - + """ Args: - name (str): The name of the function - bin_name (str): The name of the binary (used for linking to a compute core) - arg_types (list[type[np.ndarray] | np.dtype], optional): The type signature of the function. Defaults to []. + name: Symbol name of the function as it appears in the object file. + object_file_name: Filename of the pre-compiled object file + (e.g. ``"add_one.o"``). Must be on the linker search path + at compile time. + arg_types: Type signature of the function arguments. Defaults to []. """ super().__init__(name, arg_types) - self._bin_name = bin_name + self._object_file_name = object_file_name @property - def bin_name(self) -> str: - return self._bin_name + def object_file_name(self) -> str: + """Filename of the compiled object file.""" + return self._object_file_name def resolve( self, @@ -77,11 +126,25 @@ def resolve( ip: ir.InsertionPoint | None = None, ) -> None: if not self._op: - self._op = external_func(self._name, inputs=self._arg_types) + self._op = external_func( + self._name, inputs=self._arg_types, link_with=self._object_file_name + ) class ExternalFunction(Kernel): - _instances = set() + """An AIE core function compiled from C/C++ source at JIT time. + + Each instance is registered in ``_instances`` at construction time so that + the ``@jit`` decorator can discover and compile all source files before + invoking the MLIR compilation pipeline. ``_instances`` is cleared at the + start of each ``@jit`` call to prevent stale registrations from a previous + (possibly failed) run. + + Use the base :class:`Kernel` class instead when you have a pre-built + object file. + """ + + _instances: set = set() # Registry of all live ExternalFunction instances. def __init__( self, @@ -92,108 +155,51 @@ def __init__( arg_types: list[type[np.ndarray] | np.dtype] = [], include_dirs: list[str] = [], compile_flags: list[str] = [], - debug: bool = False, ) -> None: - """An ExternalFunction is a C/C++ source file that gets compiled to an object file and eventually resolves to a FuncOp. - If it is called, a CallOp will be generated. - + """ Args: - name (str): The name of the function - object_file_name (str, optional): The name of the object file. If None, it will be name.o. - source_file (str): Path to the C/C++ source file - source_string (str): C/C++ source code as a string - arg_types (list[type[np.ndarray] | np.dtype], optional): The type signature of the function. Defaults to []. - include_dirs (list[str], optional): Additional include directories. Defaults to []. - compile_flags (list[str], optional): Additional compilation flags. Defaults to []. - debug (bool, optional): Enable debug logging. Defaults to False. + name: Symbol name of the function as it will appear in the object + file. + object_file_name: Output object file name. Defaults to + ``.o``. + source_file: Path to a C/C++ source file on disk. Mutually + exclusive with ``source_string``. + source_string: Inline C/C++ source code. Mutually exclusive with + ``source_file``. + arg_types: Type signature of the function arguments. Defaults to + []. + include_dirs: Additional ``-I`` directories passed to the Peano + compiler. Defaults to []. + compile_flags: Additional flags passed verbatim to the Peano + compiler. Defaults to []. """ if not object_file_name: object_file_name = f"{name}.o" super().__init__(name, object_file_name, arg_types) - self._setup_source(source_file, source_string) - self._include_dirs = include_dirs - self._compile_flags = compile_flags - self._compiled = False - self._arg_types = arg_types - self._op: FuncOp | None = None - self._debug = debug - - if self._debug: - logger.debug("Initializing ExternalFunction: %s", name) - logger.debug("Source file: %s", source_file) - logger.debug("Include dirs: %s", include_dirs) - logger.debug("Compile flags: %s", compile_flags) - - # Track this instance for JIT compilation - ExternalFunction._instances.add(self) - - def _setup_source(self, source_file: str | None, source_string: str | None) -> None: - """Set up the source file for compilation.""" if source_file is not None: self._source_file = source_file self._source_string = None - else: - if source_string is None: - raise ValueError("source_file or source_string must be provided") + elif source_string is not None: self._source_file = None self._source_string = source_string + else: + raise ValueError("source_file or source_string must be provided.") - def __enter__(self): - """Enter the context.""" - return self - - def __exit__(self, exc_type, exc_value, traceback): - """Exit the context.""" - pass - - def tile_size(self, arg_index: int = 0) -> int: - """Get the tile size from the specified array argument type. - - Args: - arg_index (int): Index of the argument to get tile size from. Defaults to 0. - - Returns: - int: The tile size (first dimension) of the specified argument. - """ - if not self._arg_types: - raise ValueError("No argument types defined") - if arg_index >= len(self._arg_types): - raise ValueError( - f"Argument index {arg_index} out of range (max: {len(self._arg_types) - 1})" - ) - - arg = self._arg_types[arg_index] - - # Handle numpy array types like np.ndarray[(16,), np.dtype[np.int32]] - if hasattr(arg, "__args__") and len(arg.__args__) > 0: - # For types like np.ndarray[(16,), np.dtype[np.int32]], the shape is in __args__[0] - shape_arg = arg.__args__[0] - if isinstance(shape_arg, tuple) and len(shape_arg) > 0: - return shape_arg[0] - - # Handle MLIR types like MemRefType(memref<16xi32>) - if ( - hasattr(arg, "shape") - and hasattr(arg.shape, "__len__") - and len(arg.shape) > 0 - ): - return arg.shape[0] - - raise ValueError( - f"Argument {arg_index} does not have a shape or is not an array type" - ) + self._include_dirs = include_dirs + self._compile_flags = compile_flags + self._compiled = False - def arg_types(self) -> list: - """Get the argument types of the ExternalFunction.""" - return self._arg_types.copy() + # Register this instance so the @jit decorator can compile it. + ExternalFunction._instances.add(self) def __call__(self, *args, **kwargs): - """Call the ExternalFunction with argument validation.""" + """Call with argument count and type validation before emitting MLIR.""" if len(args) != len(self._arg_types): raise ValueError( - f"ExternalFunction '{self._name}' expects {len(self._arg_types)} argument(s), " - f"but {len(args)} were provided." + f"ExternalFunction '{self._name}' expects " + f"{len(self._arg_types)} argument(s), but {len(args)} " + f"were provided." ) for i, (arg, expected_ty) in enumerate(zip(args, self._arg_types)): self._validate_arg(i, arg, expected_ty) @@ -201,15 +207,12 @@ def __call__(self, *args, **kwargs): def _validate_arg(self, index: int, arg, expected_ty) -> None: """Validate a single argument against its expected type.""" - # Scalar types (np.int32, np.float32, etc.) if isinstance(expected_ty, type) and issubclass(expected_ty, np.generic): if not isinstance(arg, (int, float, np.integer, np.floating)): raise ValueError( f"Argument {index}: expected scalar, got {type(arg).__name__}" ) return - - # Array types - check shape and dtype if hasattr(expected_ty, "__args__") and hasattr(arg, "shape"): expected_shape = expected_ty.__args__[0] expected_dtype = expected_ty.__args__[1].__args__[0] @@ -220,27 +223,18 @@ def _validate_arg(self, index: int, arg, expected_ty) -> None: ) def __hash__(self): - """ - Compute a hash for the ExternalFunction based on its properties. - This allows ExternalFunction instances to be used in cache keys. - """ - # Create a string representation of the function's key properties + """Hash based on source content and compiler options for cache keying.""" + # TODO: extend to cover included headers (issue #2543) hash_parts = [ self._name, str(self._arg_types), str(sorted(self._include_dirs)), str(sorted(self._compile_flags)), ] - - # Include source content for uniqueness - # TODO: This solution needs to be extended to handle headers. See https://github.com/Xilinx/mlir-aie/issues/2543 if self._source_string: hash_parts.append(self._source_string) elif self._source_file: with open(self._source_file, "r") as f: - file_content = f.read() - hash_parts.append(file_content) - - # Create hash from combined string + hash_parts.append(f.read()) combined = "|".join(hash_parts) return int(hashlib.sha256(combined.encode("utf-8")).hexdigest()[:8], 16) diff --git a/python/iron/worker.py b/python/iron/worker.py index ac75242a8c5..10f5225d056 100644 --- a/python/iron/worker.py +++ b/python/iron/worker.py @@ -15,7 +15,6 @@ from .device import PlacementTile, AnyComputeTile, Tile from .dataflow.objectfifo import ObjectFifoHandle, ObjectFifo from .dataflow.endpoint import ObjectFifoEndpoint -from .kernel import Kernel, ExternalFunction from .buffer import Buffer from .resolvable import Resolvable @@ -73,18 +72,14 @@ def do_nothing_core_fun(*args) -> None: self.core_fn = do_nothing_core_fun else: self.core_fn = core_fn - self.link_with: str | None = None self.fn_args = fn_args - bin_names = set() self._fifos = [] self._buffers = [] self._barriers = [] # Check arguments to the core. Some information is saved for resolution. for arg in self.fn_args: - if isinstance(arg, (Kernel, ExternalFunction)): - bin_names.add(arg.bin_name) - elif isinstance(arg, ObjectFifoHandle): + if isinstance(arg, ObjectFifoHandle): arg.endpoint = self self._fifos.append(arg) elif isinstance(arg, Buffer): @@ -98,17 +93,10 @@ def do_nothing_core_fun(*args) -> None: ) elif isinstance(arg, WorkerRuntimeBarrier): self._barriers.append(arg) - # We assume other arguments are metaprogramming (e.g, Python args) - # This could allow some errors to sink through, but we allow it for now. - # TODO: this could be cleaned up through creation of a MetaArgs struct, so you - # could access values through meta.my_var within the function. - - if len(bin_names) > 1: - raise ValueError( - f"Currently, only one binary per works is supported. Found: {bin_names}" - ) - if len(bin_names) == 1: - self.link_with = list(bin_names)[0] + # Kernel/ExternalFunction instances are valid fn_args — they resolve to + # func.call ops when invoked inside core_fn and carry link_with on their + # func.func declaration. Other unrecognized args are assumed to be + # metaprogramming values (Python scalars, etc.). def place(self, tile: Tile) -> None: """Set the placement of the Worker. @@ -145,7 +133,6 @@ def resolve( if not self._tile: raise ValueError("Must place Worker before it can be resolved.") my_tile = self._tile.op - my_link = self.link_with # Create the necessary locks for the core operation to synchronize with the runtime sequence # and register them in the corresponding barriers. @@ -153,7 +140,7 @@ def resolve( l = lock(my_tile) barrier._add_worker_lock(l) - @core(my_tile, link_with=my_link, stack_size=self.stack_size) + @core(my_tile, stack_size=self.stack_size) def core_body(): for _ in range_(sys.maxsize) if self._while_true else range(1): self.core_fn(*self.fn_args) diff --git a/python/utils/compile/__init__.py b/python/utils/compile/__init__.py index 701b4dd60d4..9206688a30e 100644 --- a/python/utils/compile/__init__.py +++ b/python/utils/compile/__init__.py @@ -9,7 +9,6 @@ import os from pathlib import Path -from .link import merge_object_files from .utils import ( compile_cxx_core_function, compile_mlir_module, diff --git a/python/utils/compile/link.py b/python/utils/compile/link.py deleted file mode 100644 index a78e0a62091..00000000000 --- a/python/utils/compile/link.py +++ /dev/null @@ -1,51 +0,0 @@ -# link.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. - -import logging -import subprocess -from os import PathLike - -import aie.utils.config as config - -logger = logging.getLogger(__name__) - - -def merge_object_files( - object_paths: list[PathLike], - output_path: PathLike, - cwd=None, -) -> None: - """ - Merges multiple object files into a single output file. - - Args: - object_paths (list of PathLike): List of paths to object files to merge. - output_path (PathLike): Path to the output object file. - cwd (str, optional): Overrides the current working directory. - """ - cmd = [ - config.peano_linker_path(), - "-r", # relocatable output - "-o", - str(output_path), - *[str(obj) for obj in object_paths], - ] - logger.debug("Linking object files with: %s", " ".join(cmd)) - ret = subprocess.run( - cmd, - cwd=cwd, - check=False, - capture_output=True, - ) - if ret.stdout: - logger.debug("%s", ret.stdout.decode()) - if ret.returncode != 0: - if ret.stderr: - raise RuntimeError(f"[Peano] object linking failed:\n{ret.stderr.decode()}") - else: - raise RuntimeError("[Peano] object linking failed") diff --git a/python/utils/compile/utils.py b/python/utils/compile/utils.py index 3d49373f6c7..113567a3666 100644 --- a/python/utils/compile/utils.py +++ b/python/utils/compile/utils.py @@ -11,7 +11,6 @@ import subprocess import aie.compiler.aiecc.main as aiecc import aie.utils.config as config -from .link import merge_object_files logger = logging.getLogger(__name__) @@ -86,16 +85,25 @@ def compile_mlir_module( options=None, ): """ - Compile an MLIR module to instruction, PDI, and/or xbclbin files using the aiecc module. - This function supports only the Peano compiler. - Parameters: - mlir_module (str): MLIR module to compile. - insts_path (str): Path to the instructions binary file. - pdi_path (str): Path to the PDI file. - xclbin_path (str): Path to the xclbin file. - verbose (bool): If True, enable verbose output. - work_dir (str): Compilation working directory. - options (list[str]): List of additional options. + Compile an MLIR module to instruction, PDI, and/or xclbin files using aiecc. + + By default uses the Peano compiler backend (--no-xchesscc --no-xbridge). + Pass additional flags via ``options`` to override. + + When ``work_dir`` is provided, the MLIR is written to a file inside that + directory so that the C++ aiecc binary resolves relative ``link_with`` + paths on ``func.func`` declarations against the same directory where + ``compile_external_kernel`` placed the compiled object files. + + Args: + mlir_module: MLIR module to compile. + insts_path: Output path for the NPU instruction binary. + pdi_path: Output path for the PDI file. + xclbin_path: Output path for the xclbin package. + verbose: If True, pass --verbose to aiecc. + work_dir: Compilation working directory; also determines where the + MLIR input file is written when invoking the C++ aiecc binary. + options: Additional aiecc command-line options. """ args = [ @@ -116,67 +124,90 @@ def compile_mlir_module( args.append("--verbose") if options: args.extend(options) - try: - aiecc.run(mlir_module, args) - except Exception as e: - raise RuntimeError("[aiecc] Compilation failed") from e + # When work_dir is provided, invoke the aiecc binary as a subprocess so + # that it resolves relative link_with paths (e.g. "add_one.o") against the + # same directory where compile_external_kernel placed the compiled objects. + # The MLIR file is written to work_dir/aie.mlir; callers (e.g. jit.py) + # may have already written it there, in which case this is a no-op write. + # If no work_dir is provided, fall back to aiecc.run() which writes to a + # temporary file internally. + if work_dir: + aiecc_bin = shutil.which("aiecc") + if not aiecc_bin: + raise RuntimeError( + "Could not find 'aiecc' binary. Ensure mlir-aie is installed " + "and its bin directory is in PATH." + ) + mlir_file = os.path.join(work_dir, "aie.mlir") + with open(mlir_file, "w") as f: + f.write(str(mlir_module)) + result = subprocess.run( + [aiecc_bin, mlir_file] + args, capture_output=True, text=True + ) + if result.stdout: + logger.debug("%s", result.stdout) + if result.stderr: + logger.debug("%s", result.stderr) + if result.returncode != 0: + error_msg = result.stderr if result.stderr else result.stdout + raise RuntimeError( + f"[aiecc] Compilation failed with exit code {result.returncode}:\n" + f"{error_msg}" + ) + else: + try: + aiecc.run(mlir_module, args) + except Exception as e: + raise RuntimeError("[aiecc] Compilation failed") from e def compile_external_kernel(func, kernel_dir, target_arch): """ Compile an ExternalFunction to an object file in the kernel directory. + The output file is named ``func.object_file_name`` and placed in ``kernel_dir``. + If the object file already exists in ``kernel_dir``, compilation is skipped. + Args: - func: ExternalFunction instance to compile - kernel_dir: Directory to place the compiled object file - target_arch: Target architecture (e.g., "aie2" or "aie2p") + func: ExternalFunction instance to compile. + kernel_dir: Directory where the compiled object file will be placed. + Must be the same directory passed as ``work_dir`` to + ``compile_mlir_module`` so that relative link_with paths resolve + correctly. + target_arch: Peano target architecture string (e.g., "aie2", "aie2p"). """ - # Skip if already compiled - if hasattr(func, "_compiled") and func._compiled: + # Skip if already compiled in this session. + if func._compiled: return - # Check if object file already exists in kernel directory - output_file = os.path.join(kernel_dir, func.bin_name) + # Skip if the object file already exists (cache hit). + output_file = os.path.join(kernel_dir, func.object_file_name) if os.path.exists(output_file): return - # Create source file in kernel directory source_file = os.path.join(kernel_dir, f"{func._name}.cc") - # Handle both source_string and source_file cases if func._source_string is not None: - # Use source_string (write to file) - try: - with open(source_file, "w") as f: - f.write(func._source_string) - except Exception as e: - raise + with open(source_file, "w") as f: + f.write(func._source_string) elif func._source_file is not None: # Use source_file (copy existing file) # Check if source file exists before copying if os.path.exists(func._source_file): - try: - shutil.copy2(func._source_file, source_file) - except Exception as e: - raise + shutil.copy2(func._source_file, source_file) else: return else: raise ValueError("Neither source_string nor source_file is provided") - try: - compile_cxx_core_function( - source_path=source_file, - target_arch=target_arch, - output_path=output_file, - include_dirs=func._include_dirs, - compile_args=func._compile_flags, - cwd=kernel_dir, - ) - except Exception as e: - raise - - # Mark the function as compiled + compile_cxx_core_function( + source_path=source_file, + target_arch=target_arch, + output_path=output_file, + include_dirs=func._include_dirs, + compile_args=func._compile_flags, + cwd=kernel_dir, + ) func._compiled = True diff --git a/python/utils/jit.py b/python/utils/jit.py index e1dab95074f..5a90b0cb6c0 100644 --- a/python/utils/jit.py +++ b/python/utils/jit.py @@ -19,7 +19,7 @@ from .compile.cache.utils import _create_function_cache_key, file_lock from .compile import NPU_CACHE_HOME from .compile.utils import _cleanup_failed_compilation -from aie.iron.kernel import ExternalFunction +from aie.iron.kernel import ExternalFunction, Kernel # Global cache for compiled kernels at the function level # Key: (function_name, args_signature) -> NPUKernel instance @@ -69,37 +69,51 @@ def decorator(*args, **kwargs): tensor_args = _filter_tensor_args(args) return cached_kernel(*tensor_args, **kwargs) - # Clear any instances from previous runs to make sure if the user provided any broken code we don't try to recompile it + # Collect ExternalFunction instances that need JIT compilation. + # Note: bare Kernel instances (pre-compiled .o) are intentionally + # excluded here — they require no compilation step. Both Kernel and + # ExternalFunction are stripped from the tensor args passed to the NPU + # kernel (see _filter_tensor_args). + # ExternalFunction.__init__ registers to _instances at construction time + # (before this JIT call), so they must be captured before the clear below. + external_kernels = [ + arg for arg in args if isinstance(arg, ExternalFunction) + ] + [v for v in kwargs.values() if isinstance(v, ExternalFunction)] + seen = set(id(k) for k in external_kernels) + + # Clear stale instances from previous (possibly failed) runs so that a + # broken kernel doesn't prevent a corrected one from being recompiled. ExternalFunction._instances.clear() - # Find ExternalFunction instances in arguments and kwargs - external_kernels = [] - for arg in args: - if isinstance(arg, ExternalFunction): - external_kernels.append(arg) - for value in kwargs.values(): - if isinstance(value, ExternalFunction): - external_kernels.append(value) - - # Execute the function to generate MLIR + # Execute the function to generate MLIR. + # Placed designs use the raw @device(...) DSL and populate the context + # as a side effect (returning nothing), so we must provide an outer + # mlir_mod_ctx() and capture ctx.module. + # Non-placed designs use Program.resolve_program(), which opens its own + # mlir_mod_ctx() internally and returns the module directly; wrapping + # them in an outer context would give an empty module. if is_placed: with mlir_mod_ctx() as ctx: function(*args, **kwargs) - assert ( - ctx.module.operation.verify() - ), f"Verification failed for '{function.__name__}'" + if not ctx.module.operation.verify(): + raise RuntimeError( + f"MLIR verification failed for '{function.__name__}'" + ) mlir_module = ctx.module else: mlir_module = function(*args, **kwargs) + if not mlir_module.operation.verify(): + raise RuntimeError( + f"MLIR verification failed for '{function.__name__}'" + ) - # Compile all ExternalFunction instances that were created during this JIT compilation + # Also collect ExternalFunction instances created during function() + # execution (e.g. inside algorithm helpers that construct them internally). for func in ExternalFunction._instances: - if ( - not hasattr(func, "_compiled") or not func._compiled - ): # Don't compile if already compiled + if not func._compiled and id(func) not in seen: external_kernels.append(func) + seen.add(id(func)) - # Determine target architecture based on device type current_device = DefaultNPURuntime.device() # Determine target architecture based on device type @@ -174,18 +188,21 @@ def decorator(*args, **kwargs): def _filter_tensor_args(args): """ - Filter out non-tensor arguments from args. Required for Algorithms because - they pass ExternalFunction and scalar values in their signature that should - not be interpreted as runtime sequence arguments. + Filter out non-tensor arguments from args. + + Algorithm functions may include Kernel/ExternalFunction instances and scalar + compile-time constants in their Python signature that must not be forwarded + to the NPU kernel as runtime buffer arguments. Removes: - - ExternalFunction instances - - Scalar values (int, float, np.integer, np.floating), embedded as MLIR constants + - Kernel and ExternalFunction instances (resolved at compile time via link_with) + - Scalar values (int, float, np.integer, np.floating) used as MLIR constants + - Callables (e.g. lambda configuration helpers) """ tensor_args = [] for arg in args: - # Skip ExternalFunction - if isinstance(arg, ExternalFunction): + # Skip any kernel handle (Kernel, ExternalFunction, or subclasses) + if isinstance(arg, Kernel): continue # Skip scalar types (MLIR constants) if isinstance(arg, (int, float, np.integer, np.floating)): diff --git a/test/Integration/julia_by_lines/aie.mlir b/test/Integration/julia_by_lines/aie.mlir index 6695dfe1c95..72acf9d224b 100644 --- a/test/Integration/julia_by_lines/aie.mlir +++ b/test/Integration/julia_by_lines/aie.mlir @@ -21,8 +21,8 @@ module @test { %lock13_3 = aie.lock(%tile13, 3) - func.func private @func(%A: memref<32x32xi32>, %MinRe : f32, %MaxRe : f32, %MinIm : f32, %MaxIm : f32) -> () - func.func private @do_line(%A: memref<32x32xi32>, %MinRe : f32, %StepRe : f32, %Im : f32, %cols : i32) -> () + func.func private @func(%A: memref<32x32xi32>, %MinRe : f32, %MaxRe : f32, %MinIm : f32, %MaxIm : f32) -> () attributes {link_with = "kernel.o"} + func.func private @do_line(%A: memref<32x32xi32>, %MinRe : f32, %StepRe : f32, %Im : f32, %cols : i32) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { %MinRe = arith.constant -1.5 : f32 @@ -51,5 +51,5 @@ module @test { scf.yield %Im_next : f32 } aie.end - } { link_with="kernel.o" } + } } diff --git a/test/aiecc/cpp_link_with.mlir b/test/aiecc/cpp_link_with.mlir index 0e0a36cc83c..96952a40b2f 100644 --- a/test/aiecc/cpp_link_with.mlir +++ b/test/aiecc/cpp_link_with.mlir @@ -29,7 +29,7 @@ module { aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> - func.func private @external_func(memref<16xi32>, memref<16xi32>) + func.func private @external_func(memref<16xi32>, memref<16xi32>) attributes {link_with = "external.o"} %core_0_2 = aie.core(%tile_0_2) { %subview_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> @@ -43,7 +43,7 @@ module { aie.objectfifo.release @of_in(Consume, 1) aie.objectfifo.release @of_out(Produce, 1) aie.end - } {link_with = "external.o"} + } aie.runtime_sequence(%in : memref<16xi32>, %out : memref<16xi32>) { %c0 = arith.constant 0 : i64 diff --git a/test/aiecc/cpp_link_with_both_attrs.mlir b/test/aiecc/cpp_link_with_both_attrs.mlir new file mode 100644 index 00000000000..f4a40cae290 --- /dev/null +++ b/test/aiecc/cpp_link_with_both_attrs.mlir @@ -0,0 +1,26 @@ +//===- cpp_link_with_both_attrs.mlir ----------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that a core with both the deprecated 'link_with' scalar attr AND the +// canonical 'link_files' array attr on the same CoreOp is rejected by the +// CoreOp verifier. + +// RUN: aie-opt --verify-diagnostics %s + +module { + aie.device(npu1_1col) { + %tile_0_2 = aie.tile(0, 2) + + // expected-error@+1 {{cannot specify both 'link_with' (deprecated) and 'link_files'}} + %core_0_2 = aie.core(%tile_0_2) { + aie.end + } {link_with = "a.o", link_files = ["b.o"]} + } +} diff --git a/test/aiecc/cpp_link_with_deprecation.mlir b/test/aiecc/cpp_link_with_deprecation.mlir new file mode 100644 index 00000000000..1d450676407 --- /dev/null +++ b/test/aiecc/cpp_link_with_deprecation.mlir @@ -0,0 +1,38 @@ +//===- cpp_link_with_deprecation.mlir --------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that core-level link_with still compiles but emits a deprecation warning, +// and that the pass migrates the attribute to link_files on the core. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s +// RUN: aie-opt --aie-assign-core-link-files %s | FileCheck %s --check-prefix=MIGRATED + +// Verify the pass migrated the deprecated core-level attr into link_files and +// removed link_with from the core. +// MIGRATED: link_files = ["legacy.o"] +// MIGRATED-NOT: link_with = "legacy.o" + +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + + // expected-warning@+1 {{link_with on aie.core is deprecated; attach link_with to the func.func declaration instead}} + %core_0_2 = aie.core(%tile_0_2) { + %buf = aie.objectfifo.acquire @of(Consume, 1) : !aie.objectfifosubview> + aie.objectfifo.release @of(Consume, 1) + aie.end + } {link_with = "legacy.o"} + + aie.runtime_sequence() {} + } +} diff --git a/test/aiecc/cpp_link_with_emitter_fallback.mlir b/test/aiecc/cpp_link_with_emitter_fallback.mlir new file mode 100644 index 00000000000..fb432c76463 --- /dev/null +++ b/test/aiecc/cpp_link_with_emitter_fallback.mlir @@ -0,0 +1,34 @@ +//===- cpp_link_with_emitter_fallback.mlir ---------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test the deprecated fallback path in the ldscript and BCF emitters: +// when a core still has a core-level link_with (and no link_files), both +// emitters should still emit the correct entry without running +// aie-assign-core-link-files first. + +// RUN: aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=2 %s | FileCheck %s --check-prefix=LDSCRIPT +// RUN: aie-translate --aie-generate-bcf --tilecol=0 --tilerow=2 %s | FileCheck %s --check-prefix=BCF + +// LDSCRIPT: INPUT(fallback.o) +// BCF: _include _file fallback.o + +// Use a bare core without objectfifo so no lowering is needed before +// aie-translate can generate the ldscript/BCF. + +module { + aie.device(npu1_1col) { + %tile_0_2 = aie.tile(0, 2) + + // Core keeps the old core-level link_with (no pass run, no link_files set). + %core_0_2 = aie.core(%tile_0_2) { + aie.end + } {link_with = "fallback.o"} + } +} diff --git a/test/aiecc/cpp_link_with_func_level.mlir b/test/aiecc/cpp_link_with_func_level.mlir new file mode 100644 index 00000000000..9a53f0f4313 --- /dev/null +++ b/test/aiecc/cpp_link_with_func_level.mlir @@ -0,0 +1,58 @@ +//===- cpp_link_with_func_level.mlir ---------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Canonical new style: link_with is on func.func, not on aie.core. +// Verify that AIEAssignCoreLinkFiles populates link_files on the core and +// that the ldscript/BCF emitters produce the correct entries. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | FileCheck %s --check-prefix=OPT +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=LDSCRIPT +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | aie-translate --aie-generate-bcf --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=BCF + +// OPT: link_files = ["f.o"] + +// LDSCRIPT: INPUT(f.o) + +// BCF: _include _file f.o + +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + func.func private @f(memref<16xi32>, memref<16xi32>) attributes {link_with = "f.o"} + + %core_0_2 = aie.core(%tile_0_2) { + %subview_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %subview_in[0] : !aie.objectfifosubview> -> memref<16xi32> + + %subview_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %subview_out[0] : !aie.objectfifosubview> -> memref<16xi32> + + func.call @f(%elem_in, %elem_out) : (memref<16xi32>, memref<16xi32>) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + aie.end + } + + aie.runtime_sequence(%in : memref<16xi32>, %out : memref<16xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c16 = arith.constant 16 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c16][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<16xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c16][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<16xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/aiecc/cpp_link_with_indirect_call.mlir b/test/aiecc/cpp_link_with_indirect_call.mlir new file mode 100644 index 00000000000..5d31d3e9fba --- /dev/null +++ b/test/aiecc/cpp_link_with_indirect_call.mlir @@ -0,0 +1,30 @@ +//===- cpp_link_with_indirect_call.mlir -------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that an indirect call inside a core body triggers a warning from +// aie-assign-core-link-files, since link_with on indirectly-called funcs +// cannot be statically resolved. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s + +module { + aie.device(npu1_1col) { + %tile_0_2 = aie.tile(0, 2) + + func.func private @some_helper() -> () + + %core_0_2 = aie.core(%tile_0_2) { + %fptr = func.constant @some_helper : () -> () + // expected-warning@+1 {{indirect call in core body — link_with attributes on indirectly-called functions are not automatically resolved}} + func.call_indirect %fptr() : () -> () + aie.end + } + } +} diff --git a/test/aiecc/cpp_link_with_mixed.mlir b/test/aiecc/cpp_link_with_mixed.mlir new file mode 100644 index 00000000000..57a25d366b1 --- /dev/null +++ b/test/aiecc/cpp_link_with_mixed.mlir @@ -0,0 +1,49 @@ +//===- cpp_link_with_mixed.mlir --------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that a core with both a deprecated core-level link_with AND a call to +// a func.func with its own link_with produces a merged, deduplicated link_files +// set. The core-level attr is consumed (removed) and both .o paths appear +// exactly once in link_files. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | FileCheck %s --check-prefix=OPT +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=LDSCRIPT + +// The merged set must contain both files. +// OPT-DAG: "core_only.o" +// OPT-DAG: "func_only.o" +// The deprecated core-level attr must be gone. +// OPT-NOT: link_with = "core_only.o" + +// LDSCRIPT-DAG: INPUT(core_only.o) +// LDSCRIPT-DAG: INPUT(func_only.o) + +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + + func.func private @ext(memref<16xi32>) attributes {link_with = "func_only.o"} + + // Core carries deprecated core-level link_with AND calls a func with its own. + // expected-warning@+1 {{link_with on aie.core is deprecated; attach link_with to the func.func declaration instead}} + %core_0_2 = aie.core(%tile_0_2) { + %buf = aie.objectfifo.acquire @of(Consume, 1) : !aie.objectfifosubview> + %elem = aie.objectfifo.subview.access %buf[0] : !aie.objectfifosubview> -> memref<16xi32> + func.call @ext(%elem) : (memref<16xi32>) -> () + aie.objectfifo.release @of(Consume, 1) + aie.end + } {link_with = "core_only.o"} + + aie.runtime_sequence() {} + } +} diff --git a/test/aiecc/cpp_link_with_no_link_with.mlir b/test/aiecc/cpp_link_with_no_link_with.mlir new file mode 100644 index 00000000000..d55757e6a53 --- /dev/null +++ b/test/aiecc/cpp_link_with_no_link_with.mlir @@ -0,0 +1,28 @@ +//===- cpp_link_with_no_link_with.mlir -------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Verify that aie-assign-core-link-files is a no-op on designs that carry +// no link_with attributes anywhere — no link_files attribute should appear +// on any CoreOp, and no warnings should be emitted. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s | FileCheck %s + +// CHECK-NOT: link_files +// CHECK-NOT: link_with + +module { + aie.device(npu1_1col) { + %tile_0_2 = aie.tile(0, 2) + + %core_0_2 = aie.core(%tile_0_2) { + aie.end + } + } +} diff --git a/test/aiecc/cpp_link_with_shared_func.mlir b/test/aiecc/cpp_link_with_shared_func.mlir new file mode 100644 index 00000000000..4da67aa4892 --- /dev/null +++ b/test/aiecc/cpp_link_with_shared_func.mlir @@ -0,0 +1,57 @@ +//===- cpp_link_with_shared_func.mlir --------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that two cores each calling the same func.func @kernel {link_with="k.o"} +// each produce exactly one INPUT(k.o) / _include _file k.o entry (no +// duplication of the shared object file). + +// RUN: aie-opt --aie-assign-core-link-files %s | FileCheck %s --check-prefix=OPT +// RUN: aie-opt --aie-assign-core-link-files %s | aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=LDSCRIPT02 +// RUN: aie-opt --aie-assign-core-link-files %s | aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=3 | FileCheck %s --check-prefix=LDSCRIPT03 + +// OPT-COUNT-2: link_files = ["k.o"] + +// LDSCRIPT02: INPUT(k.o) +// LDSCRIPT02-NOT: INPUT(k.o) + +// LDSCRIPT03: INPUT(k.o) +// LDSCRIPT03-NOT: INPUT(k.o) + +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + %tile_0_3 = aie.tile(0, 3) + + // Declare objectfifos before the cores that reference them. + aie.objectfifo @dummy_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @dummy_in2(%tile_0_0, {%tile_0_3}, 2 : i32) : !aie.objectfifo> + + func.func private @kernel(memref<16xi32>) attributes {link_with = "k.o"} + + %core_0_2 = aie.core(%tile_0_2) { + %buf = aie.objectfifo.acquire @dummy_in(Consume, 1) : !aie.objectfifosubview> + %elem = aie.objectfifo.subview.access %buf[0] : !aie.objectfifosubview> -> memref<16xi32> + func.call @kernel(%elem) : (memref<16xi32>) -> () + aie.objectfifo.release @dummy_in(Consume, 1) + aie.end + } + + %core_0_3 = aie.core(%tile_0_3) { + %buf = aie.objectfifo.acquire @dummy_in2(Consume, 1) : !aie.objectfifosubview> + %elem = aie.objectfifo.subview.access %buf[0] : !aie.objectfifosubview> -> memref<16xi32> + func.call @kernel(%elem) : (memref<16xi32>) -> () + aie.objectfifo.release @dummy_in2(Consume, 1) + aie.end + } + + aie.runtime_sequence() {} + } +} diff --git a/test/aiecc/cpp_link_with_unused_func.mlir b/test/aiecc/cpp_link_with_unused_func.mlir new file mode 100644 index 00000000000..1804bc6a033 --- /dev/null +++ b/test/aiecc/cpp_link_with_unused_func.mlir @@ -0,0 +1,27 @@ +//===- cpp_link_with_unused_func.mlir ---------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that a func.func carrying link_with that is never called from any core +// produces a warning from aie-assign-core-link-files. + +// RUN: aie-opt --verify-diagnostics --aie-assign-core-link-files %s + +module { + aie.device(npu1_1col) { + %tile_0_2 = aie.tile(0, 2) + + // expected-warning@+1 {{func 'never_called' has link_with but is never called from any core; its .o file will not be linked}} + func.func private @never_called(memref<16xi32>) attributes {link_with = "x.o"} + + %core_0_2 = aie.core(%tile_0_2) { + aie.end + } + } +} diff --git a/test/aiecc/cpp_multi_link_with.mlir b/test/aiecc/cpp_multi_link_with.mlir new file mode 100644 index 00000000000..db265d3034f --- /dev/null +++ b/test/aiecc/cpp_multi_link_with.mlir @@ -0,0 +1,62 @@ +//===- cpp_multi_link_with.mlir --------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// Test that one core calling two func.func declarations each with a distinct +// link_with attribute produces two INPUT() lines in the ldscript and two +// _include _file lines in the BCF. + +// RUN: aie-opt --aie-assign-core-link-files %s | FileCheck %s --check-prefix=OPT +// RUN: aie-opt --aie-assign-core-link-files %s | aie-translate --aie-generate-ldscript --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=LDSCRIPT +// RUN: aie-opt --aie-assign-core-link-files %s | aie-translate --aie-generate-bcf --tilecol=0 --tilerow=2 | FileCheck %s --check-prefix=BCF + +// OPT: link_files = ["kernelA.o", "kernelB.o"] + +// LDSCRIPT: INPUT(kernelA.o) +// LDSCRIPT: INPUT(kernelB.o) + +// BCF: _include _file kernelA.o +// BCF: _include _file kernelB.o + +module { + aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + func.func private @kernelA(memref<16xi32>) attributes {link_with = "kernelA.o"} + func.func private @kernelB(memref<16xi32>) attributes {link_with = "kernelB.o"} + + %core_0_2 = aie.core(%tile_0_2) { + %subview_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %subview_in[0] : !aie.objectfifosubview> -> memref<16xi32> + + %subview_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %subview_out[0] : !aie.objectfifosubview> -> memref<16xi32> + + func.call @kernelA(%elem_in) : (memref<16xi32>) -> () + func.call @kernelB(%elem_out) : (memref<16xi32>) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + aie.end + } + + aie.runtime_sequence(%in : memref<16xi32>, %out : memref<16xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c16 = arith.constant 16 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c16][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<16xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c16][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<16xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/npu-xrt/add_one_func_link_with_chess/add_one_kernel.cc b/test/npu-xrt/add_one_func_link_with_chess/add_one_kernel.cc new file mode 100644 index 00000000000..8e12df48812 --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_chess/add_one_kernel.cc @@ -0,0 +1,28 @@ +//===- add_one_kernel.cc -----------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel compiled with xchesscc_wrapper and linked via func-level +// link_with on func.func. Increments every element of a buffer by 1. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void add_one(int32_t *__restrict in, int32_t *__restrict out, int32_t n) { + for (int32_t i = 0; i < n; i++) + out[i] = in[i] + 1; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_func_link_with_chess/aie.mlir b/test/npu-xrt/add_one_func_link_with_chess/aie.mlir new file mode 100644 index 00000000000..03e67966a8e --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_chess/aie.mlir @@ -0,0 +1,60 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// End-to-end test for func-level link_with (Chess/xbridge backend). +// +// A func.func declaration carries {link_with = "add_one_kernel.o"}. The +// aie-assign-core-link-files pass (run inside aiecc) traces the CallOp inside +// the core and populates the core's link_files attribute, which the BCF emitter +// turns into _include _file directives consumed by xbridge. +// +//===----------------------------------------------------------------------===// + +module { + aie.device(NPUDEVICE) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + // func-level link_with: the kernel .o is declared here, not on aie.core. + func.func private @add_one(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "add_one_kernel.o"} + + aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + %n = arith.constant 8 : i32 + + scf.for %i = %c0 to %c8 step %c1 { + %sub_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %sub_in[0] : !aie.objectfifosubview> -> memref<8xi32> + %sub_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %sub_out[0] : !aie.objectfifosubview> -> memref<8xi32> + + func.call @add_one(%elem_in, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + } + aie.end + } + + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<64xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<64xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/npu-xrt/add_one_func_link_with_chess/run.lit b/test/npu-xrt/add_one_func_link_with_chess/run.lit new file mode 100644 index 00000000000..656c98d3972 --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_chess/run.lit @@ -0,0 +1,22 @@ +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess +// +// End-to-end test for func-level link_with using the Chess/xbridge backend. +// +// The kernel is compiled to a .o by xchesscc_wrapper, then linked via func-level +// link_with on the func.func declaration. aiecc (C++ driver) runs the +// aie-assign-core-link-files pass, which traces the func::CallOp inside the +// core and populates link_files on the CoreOp, which the BCF emitter turns into +// _include _file directives consumed by xbridge. +// +// RUN: cp %S/aie.mlir aie_arch.mlir +// RUN: %run_on_npu1% sed 's/NPUDEVICE/npu1_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu2% sed 's/NPUDEVICE/npu2_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu1% xchesscc_wrapper aie2 -I %aietools/include -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu2% xchesscc_wrapper aie2p -I %aietools/include -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: aiecc --xchesscc --xbridge --aie-generate-xclbin --xclbin-name=aie.xclbin --aie-generate-npu-insts --npu-insts-name=insts.bin ./aie_arch.mlir +// RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +// RUN: %run_on_npu1% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin +// RUN: %run_on_npu2% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin diff --git a/test/npu-xrt/add_one_func_link_with_chess/test.cpp b/test/npu-xrt/add_one_func_link_with_chess/test.cpp new file mode 100644 index 00000000000..2d6de2f4cd1 --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_chess/test.cpp @@ -0,0 +1,122 @@ +//===- test.cpp -------------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// Host test for add_one_func_link_with_chess. +// Sends 64 i32 values (1..64) through the AIE core; each is incremented by 1 +// externally by add_one_kernel.o, linked via func-level link_with. +// Expected output: 2..65. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "cxxopts.hpp" +#include "test_utils.h" +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +constexpr int IN_SIZE = 64; +constexpr int OUT_SIZE = 64; + +int main(int argc, const char *argv[]) { + cxxopts::Options options("add_one_func_link_with_chess"); + test_utils::add_default_options(options); + + cxxopts::ParseResult vm; + test_utils::parse_options(argc, argv, options, vm); + + std::vector instr_v = + test_utils::load_instr_binary(vm["instr"].as()); + + int verbosity = vm["verbosity"].as(); + if (verbosity >= 1) + std::cout << "Sequence instr count: " << instr_v.size() << "\n"; + + unsigned int device_index = 0; + auto device = xrt::device(device_index); + + if (verbosity >= 1) + std::cout << "Loading xclbin: " << vm["xclbin"].as() << "\n"; + auto xclbin = xrt::xclbin(vm["xclbin"].as()); + + std::string Node = vm["kernel"].as(); + auto xkernels = xclbin.get_kernels(); + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), + [Node](xrt::xclbin::kernel &k) { + return k.get_name().rfind(Node, 0) == 0; + }); + auto kernelName = xkernel.get_name(); + + device.register_xclbin(xclbin); + xrt::hw_context context(device, xclbin.get_uuid()); + auto kernel = xrt::kernel(context, kernelName); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + // bo_inB corresponds to the unused %buf memref in the runtime_sequence; it + // is passed to satisfy the kernel argument count but is never read by the + // device kernel, so no initialization or sync is needed. + auto bo_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); + + // Fill input: values 1..64 + uint32_t *bufInA = bo_inA.map(); + for (int i = 0; i < IN_SIZE; i++) + bufInA[i] = i + 1; + + void *bufInstr = bo_instr.map(); + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (verbosity >= 1) + std::cout << "Running Kernel.\n"; + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t *bufOut = bo_out.map(); + + int errors = 0; + for (int i = 0; i < OUT_SIZE; i++) { + uint32_t expected = i + 2; // input i+1, add_one adds 1 → i+2 + if (bufOut[i] != expected) { + std::cout << "Error at [" << i << "]: got " << bufOut[i] << ", expected " + << expected << "\n"; + errors++; + } else if (verbosity >= 1) { + std::cout << "OK [" << i << "]: " << bufOut[i] << "\n"; + } + } + + if (!errors) { + std::cout << "\nPASS!\n\n"; + return 0; + } + std::cout << "\nfailed.\n\n"; + return 1; +} diff --git a/test/npu-xrt/add_one_func_link_with_peano/add_one_kernel.cc b/test/npu-xrt/add_one_func_link_with_peano/add_one_kernel.cc new file mode 100644 index 00000000000..1e28d014233 --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_peano/add_one_kernel.cc @@ -0,0 +1,29 @@ +//===- add_one_kernel.cc -----------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel: add 1 to every element of a buffer. +// Compiled to add_one_kernel.o and linked via func-level link_with on +// func.func. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void add_one(int32_t *__restrict in, int32_t *__restrict out, int32_t n) { + for (int32_t i = 0; i < n; i++) + out[i] = in[i] + 1; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_func_link_with_peano/aie.mlir b/test/npu-xrt/add_one_func_link_with_peano/aie.mlir new file mode 100644 index 00000000000..576104bbd5b --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_peano/aie.mlir @@ -0,0 +1,62 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// End-to-end test for func-level link_with (Peano/lld backend). +// +// A func.func declaration carries {link_with = "add_one_kernel.o"}. The +// aie-assign-core-link-files pass (run inside aiecc) traces the CallOp inside +// the core and populates the core's link_files attribute, which the ldscript +// emitter turns into INPUT() directives. The Peano copy loop copies the .o +// to the .prj directory so lld can find it. +// +//===----------------------------------------------------------------------===// + +module { + aie.device(NPUDEVICE) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + // func-level link_with: the kernel .o is declared here, not on aie.core. + func.func private @add_one(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "add_one_kernel.o"} + + aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + %n = arith.constant 8 : i32 + + scf.for %i = %c0 to %c8 step %c1 { + %sub_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %sub_in[0] : !aie.objectfifosubview> -> memref<8xi32> + %sub_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %sub_out[0] : !aie.objectfifosubview> -> memref<8xi32> + + func.call @add_one(%elem_in, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + } + aie.end + } + + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c8 = arith.constant 8 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<64xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<64xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/npu-xrt/add_one_func_link_with_peano/run.lit b/test/npu-xrt/add_one_func_link_with_peano/run.lit new file mode 100644 index 00000000000..dab28dd7299 --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_peano/run.lit @@ -0,0 +1,23 @@ +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, peano +// +// End-to-end test for func-level link_with using the Peano/lld backend. +// +// The kernel is compiled to a .o by peano clang (target derived from the +// device), then linked via func-level link_with on the func.func declaration. +// aiecc (C++ driver) runs the aie-assign-core-link-files pass, which traces +// the func::CallOp inside the core and populates link_files on the CoreOp, +// which the ldscript emitter turns into an INPUT() directive. The Peano +// copy loop copies the .o to the .prj tmpdir so lld can find it. +// +// RUN: cp %S/aie.mlir aie_arch.mlir +// RUN: %run_on_npu1% sed 's/NPUDEVICE/npu1_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu2% sed 's/NPUDEVICE/npu2_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu1% %PEANO_INSTALL_DIR/bin/clang --target=aie2-none-unknown-elf -O2 -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu2% %PEANO_INSTALL_DIR/bin/clang --target=aie2p-none-unknown-elf -O2 -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: aiecc --no-xchesscc --no-xbridge --aie-generate-xclbin --xclbin-name=aie.xclbin --aie-generate-npu-insts --npu-insts-name=insts.bin ./aie_arch.mlir +// RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +// RUN: %run_on_npu1% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin +// RUN: %run_on_npu2% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin diff --git a/test/npu-xrt/add_one_func_link_with_peano/test.cpp b/test/npu-xrt/add_one_func_link_with_peano/test.cpp new file mode 100644 index 00000000000..c6d843886cd --- /dev/null +++ b/test/npu-xrt/add_one_func_link_with_peano/test.cpp @@ -0,0 +1,122 @@ +//===- test.cpp -------------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// Host test for add_one_func_link_with_peano. +// Sends 64 i32 values (1..64) through the AIE core; each is incremented by 1 +// externally by add_one_kernel.o, linked via func-level link_with. +// Expected output: 2..65. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "cxxopts.hpp" +#include "test_utils.h" +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +constexpr int IN_SIZE = 64; +constexpr int OUT_SIZE = 64; + +int main(int argc, const char *argv[]) { + cxxopts::Options options("add_one_func_link_with_peano"); + test_utils::add_default_options(options); + + cxxopts::ParseResult vm; + test_utils::parse_options(argc, argv, options, vm); + + std::vector instr_v = + test_utils::load_instr_binary(vm["instr"].as()); + + int verbosity = vm["verbosity"].as(); + if (verbosity >= 1) + std::cout << "Sequence instr count: " << instr_v.size() << "\n"; + + unsigned int device_index = 0; + auto device = xrt::device(device_index); + + if (verbosity >= 1) + std::cout << "Loading xclbin: " << vm["xclbin"].as() << "\n"; + auto xclbin = xrt::xclbin(vm["xclbin"].as()); + + std::string Node = vm["kernel"].as(); + auto xkernels = xclbin.get_kernels(); + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), + [Node](xrt::xclbin::kernel &k) { + return k.get_name().rfind(Node, 0) == 0; + }); + auto kernelName = xkernel.get_name(); + + device.register_xclbin(xclbin); + xrt::hw_context context(device, xclbin.get_uuid()); + auto kernel = xrt::kernel(context, kernelName); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + // bo_inB corresponds to the unused %buf memref in the runtime_sequence; it + // is passed to satisfy the kernel argument count but is never read by the + // device kernel, so no initialization or sync is needed. + auto bo_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); + + // Fill input: values 1..64 + uint32_t *bufInA = bo_inA.map(); + for (int i = 0; i < IN_SIZE; i++) + bufInA[i] = i + 1; + + void *bufInstr = bo_instr.map(); + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (verbosity >= 1) + std::cout << "Running Kernel.\n"; + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t *bufOut = bo_out.map(); + + int errors = 0; + for (int i = 0; i < OUT_SIZE; i++) { + uint32_t expected = i + 2; // input i+1, add_one adds 1 → i+2 + if (bufOut[i] != expected) { + std::cout << "Error at [" << i << "]: got " << bufOut[i] << ", expected " + << expected << "\n"; + errors++; + } else if (verbosity >= 1) { + std::cout << "OK [" << i << "]: " << bufOut[i] << "\n"; + } + } + + if (!errors) { + std::cout << "\nPASS!\n\n"; + return 0; + } + std::cout << "\nfailed.\n\n"; + return 1; +} diff --git a/test/npu-xrt/add_one_scale_func_link_with_chess/add_one_kernel.cc b/test/npu-xrt/add_one_scale_func_link_with_chess/add_one_kernel.cc new file mode 100644 index 00000000000..694ee947ecc --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_chess/add_one_kernel.cc @@ -0,0 +1,28 @@ +//===- add_one_kernel.cc -----------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel: copy input to output, adding 1 to every element. +// Compiled to add_one_kernel.o and linked via func-level link_with. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void add_one(int32_t *__restrict in, int32_t *__restrict out, int32_t n) { + for (int32_t i = 0; i < n; i++) + out[i] = in[i] + 1; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_scale_func_link_with_chess/aie.mlir b/test/npu-xrt/add_one_scale_func_link_with_chess/aie.mlir new file mode 100644 index 00000000000..05e2107a73f --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_chess/aie.mlir @@ -0,0 +1,66 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// End-to-end test for func-level link_with with multiple .o files (Chess/xbridge). +// +// Two func.func declarations each carry a distinct link_with attribute. +// aie-assign-core-link-files (run inside aiecc) traces both CallOps inside +// the core and produces link_files = ["add_one_kernel.o", "scale_kernel.o"] +// on the CoreOp. The BCF emitter turns each into an _include _file directive, +// and xbridge links both .o files into the core ELF. +// +//===----------------------------------------------------------------------===// + +module { + aie.device(NPUDEVICE) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + // Two func-level link_withs — each refers to a different .o file. + // aie-assign-core-link-files aggregates both into the core's link_files. + func.func private @add_one(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "add_one_kernel.o"} + func.func private @scale_by_two(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "scale_kernel.o"} + + aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + %n = arith.constant 8 : i32 + + scf.for %i = %c0 to %c8 step %c1 { + %sub_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %sub_in[0] : !aie.objectfifosubview> -> memref<8xi32> + %sub_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %sub_out[0] : !aie.objectfifosubview> -> memref<8xi32> + + // Step 1: add_one_kernel.o — out[i] = in[i] + 1 + func.call @add_one(%elem_in, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + // Step 2: scale_kernel.o — out[i] = out[i] * 2 (in-place via two-pointer form) + func.call @scale_by_two(%elem_out, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + } + aie.end + } + + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<64xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<64xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/npu-xrt/add_one_scale_func_link_with_chess/run.lit b/test/npu-xrt/add_one_scale_func_link_with_chess/run.lit new file mode 100644 index 00000000000..860745d1618 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_chess/run.lit @@ -0,0 +1,31 @@ +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess +// +// End-to-end test for func-level link_with with MULTIPLE .o files (Chess/xbridge). +// +// Two func.func declarations each carry a distinct link_with attribute: +// @add_one → add_one_kernel.o +// @scale_by_two → scale_kernel.o +// aie-assign-core-link-files traces both CallOps inside the core and +// produces link_files = ["add_one_kernel.o", "scale_kernel.o"] on the +// CoreOp. The BCF emitter emits an _include _file directive for each, +// and xbridge links both .o files into the core ELF. +// +// The kernel pipeline per tile iteration: +// 1. add_one(in, out, n) — out[i] = in[i] + 1 +// 2. scale_by_two(out, out, n) — out[i] *= 2 (in-place, same buf for in and out) +// Expected output: (input + 1) * 2. +// +// RUN: cp %S/aie.mlir aie_arch.mlir +// RUN: %run_on_npu1% sed 's/NPUDEVICE/npu1_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu2% sed 's/NPUDEVICE/npu2_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu1% xchesscc_wrapper aie2 -I %aietools/include -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu2% xchesscc_wrapper aie2p -I %aietools/include -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu1% xchesscc_wrapper aie2 -I %aietools/include -c %S/scale_kernel.cc -o ./scale_kernel.o +// RUN: %run_on_npu2% xchesscc_wrapper aie2p -I %aietools/include -c %S/scale_kernel.cc -o ./scale_kernel.o +// RUN: aiecc --xchesscc --xbridge --aie-generate-xclbin --xclbin-name=aie.xclbin --aie-generate-npu-insts --npu-insts-name=insts.bin ./aie_arch.mlir +// RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +// RUN: %run_on_npu1% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin +// RUN: %run_on_npu2% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin diff --git a/test/npu-xrt/add_one_scale_func_link_with_chess/scale_kernel.cc b/test/npu-xrt/add_one_scale_func_link_with_chess/scale_kernel.cc new file mode 100644 index 00000000000..e8d2c4a258c --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_chess/scale_kernel.cc @@ -0,0 +1,41 @@ +//===- scale_kernel.cc -------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel: multiply every element of a buffer by 2, writing to a +// separate output buffer. The loop is manually unrolled for n=8 (the fixed +// tile buffer size) to avoid a chess compiler bug where software pipelining +// sets lc=1 for loops with n < 9, causing only 1 iteration to execute. +// Compiled to scale_kernel.o and linked via func-level link_with alongside +// add_one_kernel.o — exercises multi-.o linking through the func-level +// link_with path. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void scale_by_two(int32_t *__restrict in, int32_t *__restrict out, int32_t n) { + // Manually unrolled for n=8: avoids chess sw-pipeline bug (lc=1 for n<9). + (void)n; + out[0] = in[0] + in[0]; + out[1] = in[1] + in[1]; + out[2] = in[2] + in[2]; + out[3] = in[3] + in[3]; + out[4] = in[4] + in[4]; + out[5] = in[5] + in[5]; + out[6] = in[6] + in[6]; + out[7] = in[7] + in[7]; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_scale_func_link_with_chess/test.cpp b/test/npu-xrt/add_one_scale_func_link_with_chess/test.cpp new file mode 100644 index 00000000000..e17014cd899 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_chess/test.cpp @@ -0,0 +1,122 @@ +//===- test.cpp -------------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// Host test for add_one_scale_func_link_with_chess. +// Sends 64 i32 values (1..64) through the AIE core; each is first +// incremented by 1 (add_one_kernel.o) then doubled (scale_kernel.o), +// both linked via func-level link_with. +// Expected output: (i + 2) * 2 for i in 0..63 (i.e. input i+1 → i+2 → +// 2*(i+2)). +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "cxxopts.hpp" +#include "test_utils.h" +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +constexpr int IN_SIZE = 64; +constexpr int OUT_SIZE = 64; + +int main(int argc, const char *argv[]) { + cxxopts::Options options("add_one_scale_func_link_with_chess"); + test_utils::add_default_options(options); + + cxxopts::ParseResult vm; + test_utils::parse_options(argc, argv, options, vm); + + std::vector instr_v = + test_utils::load_instr_binary(vm["instr"].as()); + + int verbosity = vm["verbosity"].as(); + if (verbosity >= 1) + std::cout << "Sequence instr count: " << instr_v.size() << "\n"; + + unsigned int device_index = 0; + auto device = xrt::device(device_index); + + if (verbosity >= 1) + std::cout << "Loading xclbin: " << vm["xclbin"].as() << "\n"; + auto xclbin = xrt::xclbin(vm["xclbin"].as()); + + std::string Node = vm["kernel"].as(); + auto xkernels = xclbin.get_kernels(); + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), + [Node](xrt::xclbin::kernel &k) { + return k.get_name().rfind(Node, 0) == 0; + }); + auto kernelName = xkernel.get_name(); + + device.register_xclbin(xclbin); + xrt::hw_context context(device, xclbin.get_uuid()); + auto kernel = xrt::kernel(context, kernelName); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); + + // Fill input: values 1..64 + uint32_t *bufInA = bo_inA.map(); + for (int i = 0; i < IN_SIZE; i++) + bufInA[i] = i + 1; + + void *bufInstr = bo_instr.map(); + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (verbosity >= 1) + std::cout << "Running Kernel.\n"; + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t *bufOut = bo_out.map(); + + int errors = 0; + for (int i = 0; i < OUT_SIZE; i++) { + // input[i] = i+1; add_one → i+2; scale_by_two → (i+2)*2 + uint32_t expected = (i + 2) * 2; + if (bufOut[i] != expected) { + std::cout << "Error at [" << i << "]: got " << bufOut[i] << ", expected " + << expected << "\n"; + errors++; + } else if (verbosity >= 1) { + std::cout << "OK [" << i << "]: " << bufOut[i] << "\n"; + } + } + + if (!errors) { + std::cout << "\nPASS!\n\n"; + return 0; + } + std::cout << "\nfailed.\n\n"; + return 1; +} diff --git a/test/npu-xrt/add_one_scale_func_link_with_peano/add_one_kernel.cc b/test/npu-xrt/add_one_scale_func_link_with_peano/add_one_kernel.cc new file mode 100644 index 00000000000..694ee947ecc --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_peano/add_one_kernel.cc @@ -0,0 +1,28 @@ +//===- add_one_kernel.cc -----------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel: copy input to output, adding 1 to every element. +// Compiled to add_one_kernel.o and linked via func-level link_with. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void add_one(int32_t *__restrict in, int32_t *__restrict out, int32_t n) { + for (int32_t i = 0; i < n; i++) + out[i] = in[i] + 1; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_scale_func_link_with_peano/aie.mlir b/test/npu-xrt/add_one_scale_func_link_with_peano/aie.mlir new file mode 100644 index 00000000000..5eb113c2365 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_peano/aie.mlir @@ -0,0 +1,67 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// End-to-end test for func-level link_with with multiple .o files (Peano/lld). +// +// Two func.func declarations each carry a distinct link_with attribute. +// aie-assign-core-link-files (run inside aiecc) traces both CallOps inside +// the core and produces link_files = ["add_one_kernel.o", "scale_kernel.o"] +// on the CoreOp. The ldscript emitter turns each into an INPUT() directive, +// and the Peano copy loop copies both .o files to the .prj tmpdir for lld. +// +//===----------------------------------------------------------------------===// + +module { + aie.device(NPUDEVICE) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) + + aie.objectfifo @of_in(%tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%tile_0_2, {%tile_0_0}, 2 : i32) : !aie.objectfifo> + + // Two func-level link_withs — each refers to a different .o file. + // aie-assign-core-link-files aggregates both into the core's link_files. + func.func private @add_one(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "add_one_kernel.o"} + func.func private @scale_by_two(memref<8xi32>, memref<8xi32>, i32) attributes {link_with = "scale_kernel.o"} + + aie.core(%tile_0_2) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + %n = arith.constant 8 : i32 + + scf.for %i = %c0 to %c8 step %c1 { + %sub_in = aie.objectfifo.acquire @of_in(Consume, 1) : !aie.objectfifosubview> + %elem_in = aie.objectfifo.subview.access %sub_in[0] : !aie.objectfifosubview> -> memref<8xi32> + %sub_out = aie.objectfifo.acquire @of_out(Produce, 1) : !aie.objectfifosubview> + %elem_out = aie.objectfifo.subview.access %sub_out[0] : !aie.objectfifosubview> -> memref<8xi32> + + // Step 1: add_one_kernel.o — out[i] = in[i] + 1 + func.call @add_one(%elem_in, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + // Step 2: scale_kernel.o — out[i] = out[i] * 2 (in-place via two-pointer form) + func.call @scale_by_two(%elem_out, %elem_out, %n) : (memref<8xi32>, memref<8xi32>, i32) -> () + + aie.objectfifo.release @of_in(Consume, 1) + aie.objectfifo.release @of_out(Produce, 1) + } + aie.end + } + + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c8 = arith.constant 8 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd(%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_out, id = 1 : i64} : memref<64xi32> + aiex.npu.dma_memcpy_nd(%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0,%c1]) {metadata = @of_in, id = 0 : i64, issue_token = true} : memref<64xi32> + aiex.npu.dma_wait {symbol = @of_out} + } + } +} diff --git a/test/npu-xrt/add_one_scale_func_link_with_peano/run.lit b/test/npu-xrt/add_one_scale_func_link_with_peano/run.lit new file mode 100644 index 00000000000..a2ff6338849 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_peano/run.lit @@ -0,0 +1,31 @@ +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, peano +// +// End-to-end test for func-level link_with with MULTIPLE .o files (Peano/lld). +// +// Two func.func declarations each carry a distinct link_with attribute: +// @add_one → add_one_kernel.o +// @scale_by_two → scale_kernel.o +// aie-assign-core-link-files traces both CallOps inside the core and +// produces link_files = ["add_one_kernel.o", "scale_kernel.o"] on the +// CoreOp. The Peano copy loop copies both .o files to the .prj tmpdir so +// lld links them together. +// +// The kernel pipeline per tile iteration: +// 1. add_one(in, out, n) — out[i] = in[i] + 1 +// 2. scale_by_two(out, out, n) — out[i] *= 2 (in-place: same buffer for in and out) +// Expected output: (input + 1) * 2. +// +// RUN: cp %S/aie.mlir aie_arch.mlir +// RUN: %run_on_npu1% sed 's/NPUDEVICE/npu1_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu2% sed 's/NPUDEVICE/npu2_1col/g' -i aie_arch.mlir +// RUN: %run_on_npu1% %PEANO_INSTALL_DIR/bin/clang --target=aie2-none-unknown-elf -O2 -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu2% %PEANO_INSTALL_DIR/bin/clang --target=aie2p-none-unknown-elf -O2 -c %S/add_one_kernel.cc -o ./add_one_kernel.o +// RUN: %run_on_npu1% %PEANO_INSTALL_DIR/bin/clang --target=aie2-none-unknown-elf -O2 -c %S/scale_kernel.cc -o ./scale_kernel.o +// RUN: %run_on_npu2% %PEANO_INSTALL_DIR/bin/clang --target=aie2p-none-unknown-elf -O2 -c %S/scale_kernel.cc -o ./scale_kernel.o +// RUN: aiecc --no-xchesscc --no-xbridge --aie-generate-xclbin --xclbin-name=aie.xclbin --aie-generate-npu-insts --npu-insts-name=insts.bin ./aie_arch.mlir +// RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags +// RUN: %run_on_npu1% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin +// RUN: %run_on_npu2% ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.bin diff --git a/test/npu-xrt/add_one_scale_func_link_with_peano/scale_kernel.cc b/test/npu-xrt/add_one_scale_func_link_with_peano/scale_kernel.cc new file mode 100644 index 00000000000..06dfcbed6e0 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_peano/scale_kernel.cc @@ -0,0 +1,33 @@ +//===- scale_kernel.cc -------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// External AIE kernel: multiply every element of a buffer by 2, writing to a +// separate output buffer. Used with the same memref for both in and out to +// perform an in-place scale after add_one_kernel. Two-pointer form with no +// __restrict allows chess to generate correct vectorized code when in==out. +// Compiled to scale_kernel.o and linked via func-level link_with alongside +// add_one_kernel.o — exercises multi-.o linking through the func-level +// link_with path. +// +//===----------------------------------------------------------------------===// + +#define NOCPP + +#include + +extern "C" { + +void scale_by_two(int32_t *in, int32_t *out, int32_t n) { + for (int32_t i = 0; i < n; i++) + out[i] = in[i] + in[i]; +} + +} // extern "C" diff --git a/test/npu-xrt/add_one_scale_func_link_with_peano/test.cpp b/test/npu-xrt/add_one_scale_func_link_with_peano/test.cpp new file mode 100644 index 00000000000..5f2267bf4a1 --- /dev/null +++ b/test/npu-xrt/add_one_scale_func_link_with_peano/test.cpp @@ -0,0 +1,122 @@ +//===- test.cpp -------------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2026, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// +// +// Host test for add_one_scale_func_link_with_peano. +// Sends 64 i32 values (1..64) through the AIE core; each is first +// incremented by 1 (add_one_kernel.o) then doubled (scale_kernel.o), +// both linked via func-level link_with. +// Expected output: (i + 2) * 2 for i in 0..63 (i.e. input i+1 → i+2 → +// 2*(i+2)). +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "cxxopts.hpp" +#include "test_utils.h" +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +constexpr int IN_SIZE = 64; +constexpr int OUT_SIZE = 64; + +int main(int argc, const char *argv[]) { + cxxopts::Options options("add_one_scale_func_link_with_peano"); + test_utils::add_default_options(options); + + cxxopts::ParseResult vm; + test_utils::parse_options(argc, argv, options, vm); + + std::vector instr_v = + test_utils::load_instr_binary(vm["instr"].as()); + + int verbosity = vm["verbosity"].as(); + if (verbosity >= 1) + std::cout << "Sequence instr count: " << instr_v.size() << "\n"; + + unsigned int device_index = 0; + auto device = xrt::device(device_index); + + if (verbosity >= 1) + std::cout << "Loading xclbin: " << vm["xclbin"].as() << "\n"; + auto xclbin = xrt::xclbin(vm["xclbin"].as()); + + std::string Node = vm["kernel"].as(); + auto xkernels = xclbin.get_kernels(); + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), + [Node](xrt::xclbin::kernel &k) { + return k.get_name().rfind(Node, 0) == 0; + }); + auto kernelName = xkernel.get_name(); + + device.register_xclbin(xclbin); + xrt::hw_context context(device, xclbin.get_uuid()); + auto kernel = xrt::kernel(context, kernelName); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); + auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); + + // Fill input: values 1..64 + uint32_t *bufInA = bo_inA.map(); + for (int i = 0; i < IN_SIZE; i++) + bufInA[i] = i + 1; + + void *bufInstr = bo_instr.map(); + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (verbosity >= 1) + std::cout << "Running Kernel.\n"; + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t *bufOut = bo_out.map(); + + int errors = 0; + for (int i = 0; i < OUT_SIZE; i++) { + // input[i] = i+1; add_one → i+2; scale_by_two → (i+2)*2 + uint32_t expected = (i + 2) * 2; + if (bufOut[i] != expected) { + std::cout << "Error at [" << i << "]: got " << bufOut[i] << ", expected " + << expected << "\n"; + errors++; + } else if (verbosity >= 1) { + std::cout << "OK [" << i << "]: " << bufOut[i] << "\n"; + } + } + + if (!errors) { + std::cout << "\nPASS!\n\n"; + return 0; + } + std::cout << "\nfailed.\n\n"; + return 1; +} diff --git a/test/npu-xrt/bd_chain_repeat_on_memtile/aie2.py b/test/npu-xrt/bd_chain_repeat_on_memtile/aie2.py index 4eaf1db77e6..3dc23da6f0b 100644 --- a/test/npu-xrt/bd_chain_repeat_on_memtile/aie2.py +++ b/test/npu-xrt/bd_chain_repeat_on_memtile/aie2.py @@ -60,7 +60,9 @@ def device_body(): # AIE Core Function declarations passThroughLine = external_func( - "passThroughLine", inputs=[core_chunk_ty, core_chunk_ty, np.int32] + "passThroughLine", + inputs=[core_chunk_ty, core_chunk_ty, np.int32], + link_with="kernel.cc.o", ) ShimTile = tile(0, 0) @@ -120,7 +122,7 @@ def device_body(): for i, compute_tile in enumerate(compute_tiles): def make_core_fn(idx): - @core(compute_tile, "kernel.cc.o") + @core(compute_tile) def core_body(): for _ in range_(sys.maxsize): elemOut = of_join[idx].acquire(ObjectFifoPort.Produce, 1) diff --git a/test/npu-xrt/cascade_flows/aie.mlir b/test/npu-xrt/cascade_flows/aie.mlir index 9fb6d3c6ddb..e89c78cd00b 100644 --- a/test/npu-xrt/cascade_flows/aie.mlir +++ b/test/npu-xrt/cascade_flows/aie.mlir @@ -27,10 +27,10 @@ module { aie.objectfifo @objFifo_out0(%t01, {%t00}, 1 : i32) : !aie.objectfifo> aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] ([] []) - func.func private @extern_kernel1() -> () - func.func private @extern_kernel2() -> () - func.func private @extern_kernel3(%b: memref<64xi32>, %size: i32) -> () - + func.func private @extern_kernel1() -> () attributes {link_with = "kernel1.o"} + func.func private @extern_kernel2() -> () attributes {link_with = "kernel2.o"} + func.func private @extern_kernel3(%b: memref<64xi32>, %size: i32) -> () attributes {link_with = "kernel3.o"} + %core02 = aie.core(%t03) { %subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview> @@ -39,17 +39,17 @@ module { aie.objectfifo.release @objFifo_in1(Consume, 1) aie.end - } { link_with="kernel1.o" } + } %core13 = aie.core(%t13) { func.call @extern_kernel2() : () -> () aie.end - } { link_with="kernel2.o" } + } %core12 = aie.core(%t12) { %size = arith.constant 64 : i32 - + %subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview> %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview> -> memref<64xi32> @@ -57,7 +57,7 @@ module { aie.objectfifo.release @objFifo_out1(Produce, 1) aie.end - } { link_with="kernel3.o" } + } aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { %c0 = arith.constant 0 : i64 diff --git a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py index 4fb63781d22..2221a7bf392 100644 --- a/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/nested_loops/aie2.py @@ -44,11 +44,13 @@ def device_body(): # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[tensor_ty, tensor_ty] + "passthrough_10_i32", + inputs=[tensor_ty, tensor_ty], + link_with="kernel.o", ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): for _ in range_(5): elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py index f79875a017e..299370178c0 100644 --- a/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/ping_pong/aie2.py @@ -42,11 +42,13 @@ def device_body(): # AIE Core Function declarations passthrough_64_i32 = external_func( - "passthrough_64_i32", inputs=[tensor_ty, tensor_ty] + "passthrough_64_i32", + inputs=[tensor_ty, tensor_ty], + link_with="kernel.o", ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py index 2b9e77194bf..ffd36c9f6aa 100644 --- a/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/reduction/aie2.py @@ -43,10 +43,12 @@ def device_body(): of_out = object_fifo("out", ComputeTile, ShimTile, 2, tile_ty) # AIE Core Function declarations - add_10_i32 = external_func("add_10_i32", inputs=[tile_ty, tile_ty, tile_ty]) + add_10_i32 = external_func( + "add_10_i32", inputs=[tile_ty, tile_ty, tile_ty], link_with="kernel.o" + ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py index 68e7f3bcb1e..79709877940 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window/aie2.py @@ -41,12 +41,14 @@ def device_body(): # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[memRef_ty, memRef_ty, memRef_ty] + "add_10_i32", + inputs=[memRef_ty, memRef_ty, memRef_ty], + link_with="kernel.o", ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py index 001752be2ad..0e43a7b0b99 100644 --- a/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/sliding_window_conditional/aie2.py @@ -45,11 +45,13 @@ def device_body(): # AIE Core Function declarations add_10_i32 = external_func( - "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] + "add_10_i32", + inputs=[subtensor_ty, subtensor_ty, subtensor_ty], + link_with="kernel.o", ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): for i in range_(10): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py index 5e75e91291f..72fa89fe959 100644 --- a/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py +++ b/test/npu-xrt/dynamic_object_fifo/two_core_sliding_window/aie2.py @@ -45,15 +45,19 @@ def device_body(): # AIE Core Function declarations passthrough_10_i32 = external_func( - "passthrough_10_i32", inputs=[subtensor_ty, subtensor_ty] + "passthrough_10_i32", + inputs=[subtensor_ty, subtensor_ty], + link_with="kernel.o", ) add_10_i32 = external_func( - "add_10_i32", inputs=[subtensor_ty, subtensor_ty, subtensor_ty] + "add_10_i32", + inputs=[subtensor_ty, subtensor_ty, subtensor_ty], + link_with="kernel.o", ) # Set up compute tiles - @core(ComputeTile, "kernel.o") + @core(ComputeTile) def core_body(): for _ in range_(10): elemOut = of_in2.acquire(ObjectFifoPort.Produce, 1) @@ -62,7 +66,7 @@ def core_body(): of_in.release(ObjectFifoPort.Consume, 1) of_in2.release(ObjectFifoPort.Produce, 1) - @core(ComputeTile2, "kernel.o") + @core(ComputeTile2) def core_body(): elemOutPre = of_out.acquire(ObjectFifoPort.Produce, 1) elemInPre = of_in2.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/npu-xrt/matrix_multiplication_using_cascade/aie_bufferx4.mlir b/test/npu-xrt/matrix_multiplication_using_cascade/aie_bufferx4.mlir index c614012de9c..570692fe7f0 100644 --- a/test/npu-xrt/matrix_multiplication_using_cascade/aie_bufferx4.mlir +++ b/test/npu-xrt/matrix_multiplication_using_cascade/aie_bufferx4.mlir @@ -8,9 +8,9 @@ module { aie.device(npu1) { // - func.func private @flush_trace() - func.func private @event_0() - func.func private @event_1() + func.func private @flush_trace() attributes {link_with = "mm.o"} + func.func private @event_0() attributes {link_with = "mm.o"} + func.func private @event_1() attributes {link_with = "mm.o"} // %tile_0_0 = aie.tile(0, 0) %tile_0_1 = aie.tile(0, 1) @@ -150,7 +150,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_1_2 = aie.mem(%tile_1_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -243,7 +243,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_2_2 = aie.mem(%tile_2_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -336,7 +336,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_3_2 = aie.mem(%tile_3_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb5, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -430,7 +430,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0) aie.flow(%tile_0_0, DMA : 1, %tile_1_1, DMA : 0) aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0) diff --git a/test/npu-xrt/matrix_multiplication_using_cascade/aie_cascadex4.mlir b/test/npu-xrt/matrix_multiplication_using_cascade/aie_cascadex4.mlir index af5deeaf2e8..b7a0a33d43b 100644 --- a/test/npu-xrt/matrix_multiplication_using_cascade/aie_cascadex4.mlir +++ b/test/npu-xrt/matrix_multiplication_using_cascade/aie_cascadex4.mlir @@ -7,13 +7,13 @@ module { aie.device(npu1) { - func.func private @matmul_scalar_put_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) - func.func private @matmul_scalar_put_get_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) - func.func private @matmul_scalar_get_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) + func.func private @matmul_scalar_put_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) attributes {link_with = "mm.o"} + func.func private @matmul_scalar_put_get_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) attributes {link_with = "mm.o"} + func.func private @matmul_scalar_get_4x1x4_4x4x4_i32_i32(memref<1x4x4x4xi32, 2 : i32>, memref<4x1x4x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) attributes {link_with = "mm.o"} // - func.func private @event_0() - func.func private @event_1() - func.func private @flush_trace() + func.func private @event_0() attributes {link_with = "mm.o"} + func.func private @event_1() attributes {link_with = "mm.o"} + func.func private @flush_trace() attributes {link_with = "mm.o"} // %tile_0_0 = aie.tile(0, 0) %tile_0_1 = aie.tile(0, 1) @@ -116,7 +116,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_1_2 = aie.mem(%tile_1_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -167,7 +167,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_2_2 = aie.mem(%tile_2_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -218,7 +218,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_3_2 = aie.mem(%tile_3_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb5, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -278,7 +278,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0) aie.flow(%tile_0_0, DMA : 1, %tile_1_1, DMA : 0) aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0) diff --git a/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx1.mlir b/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx1.mlir index c2610065706..129529356d2 100644 --- a/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx1.mlir +++ b/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx1.mlir @@ -1,10 +1,10 @@ module { aie.device(npu1) { - func.func private @matmul_scalar_4x2x4_4x8x4_i32_i32(memref<2x4x4x8xi32, 2 : i32>, memref<4x2x8x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) + func.func private @matmul_scalar_4x2x4_4x8x4_i32_i32(memref<2x4x4x8xi32, 2 : i32>, memref<4x2x8x4xi32, 2 : i32>, memref<4x4x4x4xi32, 2 : i32>) attributes {link_with = "mm.o"} // - func.func private @event_0() - func.func private @event_1() - func.func private @flush_trace() + func.func private @event_0() attributes {link_with = "mm.o"} + func.func private @event_1() attributes {link_with = "mm.o"} + func.func private @flush_trace() attributes {link_with = "mm.o"} // %tile_0_0 = aie.tile(0, 0) %tile_0_1 = aie.tile(0, 1) @@ -88,7 +88,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0) aie.flow(%tile_0_0, DMA : 1, %tile_1_1, DMA : 0) aie.flow(%tile_2_1, DMA : 0, %tile_0_0, DMA : 0) diff --git a/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx4.mlir b/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx4.mlir index 1c3585ba28c..9fa6fa77d91 100644 --- a/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx4.mlir +++ b/test/npu-xrt/matrix_multiplication_using_cascade/aie_plainx4.mlir @@ -1,10 +1,10 @@ module { aie.device(npu1) { - func.func private @matmul_scalar_2x2x2_4x8x4_i32_i32(memref<2x2x4x8xi32, 2 : i32>, memref<2x2x8x4xi32, 2 : i32>, memref<2x2x4x4xi32, 2 : i32>) + func.func private @matmul_scalar_2x2x2_4x8x4_i32_i32(memref<2x2x4x8xi32, 2 : i32>, memref<2x2x8x4xi32, 2 : i32>, memref<2x2x4x4xi32, 2 : i32>) attributes {link_with = "mm.o"} // - func.func private @event_0() - func.func private @event_1() - func.func private @flush_trace() + func.func private @event_0() attributes {link_with = "mm.o"} + func.func private @event_1() attributes {link_with = "mm.o"} + func.func private @flush_trace() attributes {link_with = "mm.o"} // %tile_0_0 = aie.tile(0, 0) %tile_0_1 = aie.tile(0, 1) @@ -123,7 +123,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_1_2 = aie.mem(%tile_1_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb5, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -183,7 +183,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_2_2 = aie.mem(%tile_2_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb5, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -243,7 +243,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } %mem_3_2 = aie.mem(%tile_3_2) { %0 = aie.dma_start(S2MM, 0, ^bb1, ^bb5, repeat_count = 1) ^bb1: // 2 preds: ^bb0, ^bb1 @@ -303,7 +303,7 @@ module { func.call @flush_trace() : () -> () // cf.br ^bb1 - } {link_with = "mm.o"} + } aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0) aie.flow(%tile_0_0, DMA : 1, %tile_1_1, DMA : 0) aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0) diff --git a/test/npu-xrt/matrix_transpose/aie2.py b/test/npu-xrt/matrix_transpose/aie2.py index 455aa3fb73e..9b10709adb3 100644 --- a/test/npu-xrt/matrix_transpose/aie2.py +++ b/test/npu-xrt/matrix_transpose/aie2.py @@ -33,7 +33,9 @@ def device_body(): matrix_ty = np.ndarray[(matrix_size,), np.dtype[np.int32]] passthrough_func = external_func( - "passthrough", inputs=[matrix_ty, matrix_ty, np.int32] + "passthrough", + inputs=[matrix_ty, matrix_ty, np.int32], + link_with="kernel.o", ) # Tile declarations as tile[row][col] @@ -46,7 +48,7 @@ def device_body(): fifo_out = object_fifo("fifo_out", tiles[2][0], tiles[0][0], 2, matrix_ty) # Core - @core(tiles[2][0], "kernel.o") + @core(tiles[2][0]) def core_body(): for _ in range_(0, 0xFFFFFFFF): elem_in = fifo_in.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/npu-xrt/nd_memcpy_transforms/aie2.py b/test/npu-xrt/nd_memcpy_transforms/aie2.py index 462d4a7e7f3..4c7e14142d8 100644 --- a/test/npu-xrt/nd_memcpy_transforms/aie2.py +++ b/test/npu-xrt/nd_memcpy_transforms/aie2.py @@ -39,6 +39,7 @@ def device_body(): concat_func = external_func( "concat", inputs=[a_ty, b_ty, c_ty, np.int32, np.int32, np.int32], + link_with="kernel.o", ) # Tile declarations as tile[row][col] @@ -52,7 +53,7 @@ def device_body(): fifo_c = object_fifo("fifo_c", tiles[2][0], tiles[0][0], 2, c_ty) # Core - @core(tiles[2][0], "kernel.o") + @core(tiles[2][0]) def core_body(): for _ in range_(0, 0xFFFFFFFF): elem_c = fifo_c.acquire(ObjectFifoPort.Produce, 1) diff --git a/test/npu-xrt/runtime_cumsum/aie.mlir b/test/npu-xrt/runtime_cumsum/aie.mlir index 841def30859..fff697a574e 100644 --- a/test/npu-xrt/runtime_cumsum/aie.mlir +++ b/test/npu-xrt/runtime_cumsum/aie.mlir @@ -34,8 +34,8 @@ module { aie.device(npu1_1col) { // AIE Core Function declarations - func.func private @sum(memref<16xi32>, memref<16xi32>) - func.func private @zero(memref<16xi32>) + func.func private @sum(memref<16xi32>, memref<16xi32>) attributes {link_with = "sum.o"} + func.func private @zero(memref<16xi32>) attributes {link_with = "sum.o"} %shim_noc_tile_0_0 = aie.tile(0, 0) %mem_tile_0_1 = aie.tile(0, 1) @@ -87,7 +87,7 @@ module { aie.objectfifo.release @out(Produce, 1) } aie.end - } {link_with = "sum.o"} + } aie.runtime_sequence @sequence(%xy: memref<128xi32>) { aiex.npu.rtp_write(@rtp2, 0, 1) diff --git a/test/npu-xrt/tile_mapped_read/aie.mlir b/test/npu-xrt/tile_mapped_read/aie.mlir index 63aa6daaed9..50e32c02851 100644 --- a/test/npu-xrt/tile_mapped_read/aie.mlir +++ b/test/npu-xrt/tile_mapped_read/aie.mlir @@ -10,7 +10,7 @@ module { aie.device(npu1_1col) { - func.func private @read_processor_bus(memref<8xi32>, i32, i32, i32) + func.func private @read_processor_bus(memref<8xi32>, i32, i32, i32) attributes {link_with = "kernel.o"} %t00 = aie.tile(0, 0) %t01 = aie.tile(0, 1) %t02 = aie.tile(0, 2) @@ -51,7 +51,7 @@ module { aie.objectfifo.release @objFifo_out1(Produce, 1) } aie.end - } {link_with = "kernel.o"} + } aie.runtime_sequence(%in : memref<64xi32>, %out : memref<64xi32>) { %c0 = arith.constant 0 : i64 diff --git a/test/npu-xrt/two_col/aie.mlir b/test/npu-xrt/two_col/aie.mlir index 500a0e61e7e..199ef36d3db 100644 --- a/test/npu-xrt/two_col/aie.mlir +++ b/test/npu-xrt/two_col/aie.mlir @@ -31,7 +31,7 @@ module { aie.objectfifo @objFifo_out3(%4, {%1}, 2 : i32) : !aie.objectfifo> aie.objectfifo @objFifo_out4(%5, {%1}, 2 : i32) : !aie.objectfifo> aie.objectfifo.link [@objFifo_out1, @objFifo_out2, @objFifo_out3, @objFifo_out4] -> [@objFifo_out0] ([0, 128, 256, 384] []) - func.func private @thresholdLine(%in: memref<128xui8>, %out: memref<128xui8>, %lineWidth: i32, %thresholdValue: i32, %maxValue: i32, %thresholdType: i8) -> () + func.func private @thresholdLine(%in: memref<128xui8>, %out: memref<128xui8>, %lineWidth: i32, %thresholdValue: i32, %maxValue: i32, %thresholdType: i8) -> () attributes {link_with = "threshold.o"} %24 = aie.core(%2) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -55,7 +55,7 @@ module { aie.objectfifo.release @objFifo_out1(Produce, 1) } aie.end - } {link_with = "threshold.o"} + } %34 = aie.core(%3) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -79,7 +79,7 @@ module { aie.objectfifo.release @objFifo_out2(Produce, 1) } aie.end - } {link_with = "threshold.o"} + } %44 = aie.core(%4) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -103,7 +103,7 @@ module { aie.objectfifo.release @objFifo_out3(Produce, 1) } aie.end - } {link_with = "threshold.o"} + } %54 = aie.core(%5) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -127,7 +127,7 @@ module { aie.objectfifo.release @objFifo_out4(Produce, 1) } aie.end - } {link_with = "threshold.o"} + } aie.runtime_sequence(%in : memref<2048xi32>, %buf : memref<32xi32>, %out : memref<2048xi32>) { %c0 = arith.constant 0 : i64 %c1 = arith.constant 1 : i64 diff --git a/test/npu-xrt/vec_mul_event_trace/aie.mlir b/test/npu-xrt/vec_mul_event_trace/aie.mlir index 60585538a0e..bce20dcb959 100644 --- a/test/npu-xrt/vec_mul_event_trace/aie.mlir +++ b/test/npu-xrt/vec_mul_event_trace/aie.mlir @@ -21,7 +21,7 @@ module { aie.device(npu1_1col) { // External kernel function declaration - func.func private @vector_scalar_mul_aie_scalar(memref<1024xi32>, memref<1024xi32>, memref<1xi32>, i32) + func.func private @vector_scalar_mul_aie_scalar(memref<1024xi32>, memref<1024xi32>, memref<1xi32>, i32) attributes {link_with = "vector_scalar_mul.o"} // Tile declarations %shim_noc_tile_0_0 = aie.tile(0, 0) @@ -56,7 +56,7 @@ module { aie.objectfifo.release @infactor(Consume, 1) } aie.end - } {link_with = "vector_scalar_mul.o"} + } // ======================================================================== // Trace Packet Flow Configuration diff --git a/test/npu-xrt/vector_scalar_using_dma/aie.mlir b/test/npu-xrt/vector_scalar_using_dma/aie.mlir index 687882d3cc3..ce18c02e45d 100644 --- a/test/npu-xrt/vector_scalar_using_dma/aie.mlir +++ b/test/npu-xrt/vector_scalar_using_dma/aie.mlir @@ -11,7 +11,7 @@ module { aie.device(npu1_1col) { - func.func private @scale_int32(memref<1024xi32>, memref<1024xi32>) + func.func private @scale_int32(memref<1024xi32>, memref<1024xi32>) attributes {link_with = "scale.o"} %tile_0_0 = aie.tile(0, 0) %tile_0_2 = aie.tile(0, 2) @@ -56,7 +56,7 @@ module { } } aie.end - } {link_with = "scale.o"} + } aie.shim_dma_allocation @in (%tile_0_0, MM2S, 0) diff --git a/test/parse-trace/test1/aie_test1.mlir b/test/parse-trace/test1/aie_test1.mlir index 2fc666f6660..ae2ffe8acef 100644 --- a/test/parse-trace/test1/aie_test1.mlir +++ b/test/parse-trace/test1/aie_test1.mlir @@ -14,7 +14,7 @@ module { aie.objectfifo @infactor(%shim_noc_tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> aie.objectfifo @in(%shim_noc_tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> aie.objectfifo @out(%tile_0_2, {%shim_noc_tile_0_0}, 2 : i32) : !aie.objectfifo> - func.func private @vector_scalar_mul_vector(memref<1024xi16>, memref<1024xi16>, memref<1xi32>, i32) + func.func private @vector_scalar_mul_vector(memref<1024xi16>, memref<1024xi16>, memref<1xi32>, i32) attributes {link_with = "scale.o"} %core_0_2 = aie.core(%tile_0_2) { %c0 = arith.constant 0 : index %c9223372036854775807 = arith.constant 9223372036854775807 : index @@ -38,7 +38,7 @@ module { aie.objectfifo.release @infactor(Consume, 1) } aie.end - } {link_with = "scale.o"} + } aie.packet_flow(1) { aie.packet_source<%tile_0_2, Trace : 0> aie.packet_dest<%shim_noc_tile_0_0, DMA : 1> diff --git a/test/parse-trace/test2/aie_test2.mlir b/test/parse-trace/test2/aie_test2.mlir index ef67fbd99d4..c6221fe58a9 100644 --- a/test/parse-trace/test2/aie_test2.mlir +++ b/test/parse-trace/test2/aie_test2.mlir @@ -9,7 +9,7 @@ module { aie.device(npu1_1col) { - func.func private @vector_scalar_mul_vector(memref<1024xi16>, memref<1024xi16>, memref<1xi32>, i32) + func.func private @vector_scalar_mul_vector(memref<1024xi16>, memref<1024xi16>, memref<1xi32>, i32) attributes {link_with = "scale.o"} %shim_noc_tile_0_0 = aie.tile(0, 0) %tile_0_2 = aie.tile(0, 2) aie.objectfifo @in(%shim_noc_tile_0_0, {%tile_0_2}, 2 : i32) : !aie.objectfifo> @@ -38,7 +38,7 @@ module { aie.objectfifo.release @infactor(Consume, 1) } aie.end - } {link_with = "scale.o"} + } aie.packet_flow(1) { aie.packet_source<%tile_0_2, Trace : 0> aie.packet_dest<%shim_noc_tile_0_0, DMA : 1> diff --git a/test/python/aie_ops.py b/test/python/aie_ops.py index 1ec19ad6f00..d01456cb6be 100644 --- a/test/python/aie_ops.py +++ b/test/python/aie_ops.py @@ -62,11 +62,11 @@ def coreOp(): # CHECK: %[[VAL1:.*]] = aie.tile(1, 1) # CHECK: %[[VAL2:.*]] = aie.core(%[[VAL1]]) { # CHECK: aie.end -# CHECK: } {dynamic_objfifo_lowering = false, link_with = "test.elf", stack_size = 2048 : i32} +# CHECK: } {dynamic_objfifo_lowering = false, stack_size = 2048 : i32} @construct_and_print_module def coreOpParameters(): t = tile(col=1, row=1) - c = Core(t, link_with="test.elf", dynamic_objfifo_lowering=False, stack_size=2048) + c = Core(t, dynamic_objfifo_lowering=False, stack_size=2048) bb = Block.create_at_start(c.body) with InsertionPoint(bb): end() diff --git a/test/python/code_region.py b/test/python/code_region.py index 2a2786e6986..72740978efd 100644 --- a/test/python/code_region.py +++ b/test/python/code_region.py @@ -20,7 +20,7 @@ # CHECK: module { # CHECK: aie.device(xcve2802) { -# CHECK: func.func private @test_func(memref<8x8xi32>) -> i32 +# CHECK: func.func private @test_func(memref<8x8xi32>) -> i32 attributes {link_with = "test.o"} # CHECK: %{{.*}}tile_0_2 = aie.tile(0, 2) # CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) # CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) @@ -38,7 +38,7 @@ # CHECK: aie.objectfifo.release @of1(Consume, 1) # CHECK: } # CHECK: aie.end -# CHECK: } {link_with = "test.o"} +# CHECK: } # CHECK: } # CHECK: } @construct_and_print_module @@ -46,7 +46,10 @@ def codeRegion(): @device(AIEDevice.xcve2802) def device_body(): test_func = external_func( - "test_func", inputs=[T.memref(8, 8, T.i32())], outputs=[np.int32] + "test_func", + inputs=[T.memref(8, 8, T.i32())], + outputs=[np.int32], + link_with="test.o", ) S = tile(0, 2) @@ -57,7 +60,7 @@ def device_body(): of1 = object_fifo("of1", M, N, 2, T.memref(8, 8, T.i32())) object_fifo_link(of0, of1) - @core(N, "test.o") + @core(N) def core_body(): for _ in range_(10): elem0 = of1.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/python/core_ext_kernel.py b/test/python/core_ext_kernel.py index 231774ea9e0..3b3dc51ade6 100644 --- a/test/python/core_ext_kernel.py +++ b/test/python/core_ext_kernel.py @@ -23,7 +23,7 @@ # CHECK: module { # CHECK: aie.device(xcve2802) { -# CHECK: func.func private @test_func(memref<8x8xi32>, i32) -> i32 +# CHECK: func.func private @test_func(memref<8x8xi32>, i32) -> i32 attributes {link_with = "test.o"} # CHECK: %{{.*}}tile_0_2 = aie.tile(0, 2) # CHECK: %{{.*}}tile_1_2 = aie.tile(1, 2) # CHECK: %{{.*}}tile_3_3 = aie.tile(3, 3) @@ -42,7 +42,7 @@ # CHECK: aie.objectfifo.release @of1(Consume, 1) # CHECK: } # CHECK: aie.end -# CHECK: } {link_with = "test.o"} +# CHECK: } # CHECK: } # CHECK: } @construct_and_print_module @@ -54,6 +54,7 @@ def core_ext_kernel(): "test_func", inputs=[np.ndarray[(8, 8), np.dtype[np.int32]], np.int32], outputs=[T.i32()], + link_with="test.o", ) S = tile(0, 2) @@ -64,7 +65,7 @@ def core_ext_kernel(): of1 = object_fifo("of1", M, N, 2, T.memref(8, 8, T.i32())) object_fifo_link(of0, of1) - C = Core(N, "test.o") + C = Core(N) bb = Block.create_at_start(C.body) with InsertionPoint(bb): for _ in range_(10): diff --git a/test/python/core_link_with_removed.py b/test/python/core_link_with_removed.py new file mode 100644 index 00000000000..1632bcff1f9 --- /dev/null +++ b/test/python/core_link_with_removed.py @@ -0,0 +1,29 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Verify that Core() raises TypeError when link_with is passed. +# link_with must be set on external_func() declarations instead. + +# RUN: %python %s + +import pytest +from aie.dialects.aie import AIEDevice, Core, Device, end, tile +from aie.ir import Block, InsertionPoint +from aie.extras.context import mlir_mod_ctx + + +def _make_core_with_link_with(): + with mlir_mod_ctx(): + dev = Device(AIEDevice.npu1_1col) + dev_block = Block.create_at_start(dev.body_region) + with InsertionPoint(dev_block): + t = tile(col=0, row=2) + Core(t, link_with="test.o") + + +# Core(link_with=...) must raise TypeError with a message directing users +# to external_func(). +with pytest.raises(TypeError, match="link_with"): + _make_core_with_link_with() + +print("PASS: Core(link_with=...) correctly raises TypeError") diff --git a/test/python/external_func_link_with.py b/test/python/external_func_link_with.py new file mode 100644 index 00000000000..ed72c5ddf3c --- /dev/null +++ b/test/python/external_func_link_with.py @@ -0,0 +1,106 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Verify that the link_with keyword argument on external_func produces the +# expected func.func attribute in the emitted MLIR. + +# RUN: %python %s | FileCheck %s + +import numpy as np +from aie.dialects.aie import AIEDevice, Device, external_func, tile, end +from aie.ir import Block, InsertionPoint + +from util import construct_and_print_module + + +# Single external_func with link_with produces a func.func with the attribute. +# CHECK-LABEL: TEST: single_func_link_with +# CHECK: func.func private @scale({{.*}}) attributes {link_with = "scale.o"} +@construct_and_print_module +def single_func_link_with(): + dev = Device(AIEDevice.npu1_1col) + dev_block = Block.create_at_start(dev.body_region) + with InsertionPoint(dev_block): + external_func( + "scale", + inputs=[np.ndarray[(16,), np.dtype[np.int32]]], + link_with="scale.o", + ) + tile(0, 2) + end() + + +# Two external_func declarations sharing the same object file each carry +# their own link_with attribute. +# CHECK-LABEL: TEST: two_funcs_same_object_file +# CHECK-DAG: func.func private @add_one({{.*}}) attributes {link_with = "kernel.o"} +# CHECK-DAG: func.func private @scale_by_two({{.*}}) attributes {link_with = "kernel.o"} +@construct_and_print_module +def two_funcs_same_object_file(): + dev = Device(AIEDevice.npu1_1col) + dev_block = Block.create_at_start(dev.body_region) + with InsertionPoint(dev_block): + external_func( + "add_one", + inputs=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + ], + link_with="kernel.o", + ) + external_func( + "scale_by_two", + inputs=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + ], + link_with="kernel.o", + ) + tile(0, 2) + end() + + +# Two external_func declarations pointing to different object files. +# CHECK-LABEL: TEST: two_funcs_different_object_files +# CHECK-DAG: func.func private @add_one({{.*}}) attributes {link_with = "add_one.o"} +# CHECK-DAG: func.func private @scale_by_two({{.*}}) attributes {link_with = "scale_by_two.o"} +@construct_and_print_module +def two_funcs_different_object_files(): + dev = Device(AIEDevice.npu1_1col) + dev_block = Block.create_at_start(dev.body_region) + with InsertionPoint(dev_block): + external_func( + "add_one", + inputs=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + ], + link_with="add_one.o", + ) + external_func( + "scale_by_two", + inputs=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + ], + link_with="scale_by_two.o", + ) + tile(0, 2) + end() + + +# external_func without link_with produces no link_with attribute. +# CHECK-LABEL: TEST: func_without_link_with +# CHECK: func.func private @helper({{.*}}) +# CHECK-NOT: link_with +@construct_and_print_module +def func_without_link_with(): + dev = Device(AIEDevice.npu1_1col) + dev_block = Block.create_at_start(dev.body_region) + with InsertionPoint(dev_block): + external_func( + "helper", + inputs=[np.ndarray[(16,), np.dtype[np.int32]]], + ) + tile(0, 2) + end() diff --git a/test/python/npu-xrt/test_compile_link.py b/test/python/npu-xrt/test_compile_link.py index fbf8db4dd3f..5b1fcb542e6 100644 --- a/test/python/npu-xrt/test_compile_link.py +++ b/test/python/npu-xrt/test_compile_link.py @@ -12,7 +12,6 @@ import tempfile from aie.utils.compile import compile_cxx_core_function -from aie.utils.compile import merge_object_files SOURCE_STRING1 = """ extern "C" { @@ -23,15 +22,6 @@ } }""" -SOURCE_STRING2 = """ -extern "C" { -void add_two(int* input, int* output, int tile_size) { - for (int i = 0; i < tile_size; i++) { - output[i] = input[i] + 2; - } -} -}""" - def test_compile(): """Test compilation of a C++ source file to an object file.""" @@ -52,44 +42,3 @@ def test_compile(): compile_args=["-DTEST"], ) assert os.path.getsize(output_path) > 0 - - -def test_compile_and_link(): - """Test compilation of two C++ source files and link them.""" - with tempfile.TemporaryDirectory() as tmpdir: - source_path1 = os.path.join(tmpdir, "source1.cpp") - source_path2 = os.path.join(tmpdir, "source2.cpp") - output_path1 = os.path.join(tmpdir, "output1.o") - output_path2 = os.path.join(tmpdir, "output2.o") - combined_output_path = os.path.join(tmpdir, "combined.o") - - with open(source_path1, "w") as f: - f.write(SOURCE_STRING1) - assert os.path.getsize(source_path1) > 0 - - with open(source_path2, "w") as f: - f.write(SOURCE_STRING2) - assert os.path.getsize(source_path2) > 0 - - assert not os.path.exists(output_path1) - compile_cxx_core_function( - source_path=source_path1, - target_arch="aie2", - output_path=output_path1, - ) - assert os.path.getsize(output_path1) > 0 - - assert not os.path.exists(output_path2) - compile_cxx_core_function( - source_path=source_path2, - target_arch="aie2", - output_path=output_path2, - ) - assert os.path.getsize(output_path2) > 0 - - assert not os.path.exists(combined_output_path) - merge_object_files( - object_paths=[output_path1, output_path2], - output_path=combined_output_path, - ) - assert os.path.getsize(combined_output_path) > 0 diff --git a/test/python/npu-xrt/test_jit_extern_functions.py b/test/python/npu-xrt/test_jit_extern_functions.py index 049031eb96d..4dac11ed75b 100644 --- a/test/python/npu-xrt/test_jit_extern_functions.py +++ b/test/python/npu-xrt/test_jit_extern_functions.py @@ -272,16 +272,14 @@ def test_include_directories(): # Create a header file header_file = os.path.join(temp_dir, "math_ops.h") with open(header_file, "w") as f: - f.write( - """ + f.write(""" #ifndef MATH_OPS_H #define MATH_OPS_H #define ADD_VALUE 42 #endif -""" - ) +""") # Create input and output tensors input_tensor = iron.randint(0, 100, (1024,), dtype=np.int32, device="npu") @@ -415,18 +413,16 @@ def test_caching_same_source(): np.testing.assert_array_equal(result1, result2) -def test_context_manager(): - """Test ExternalFunction with context manager syntax.""" - # Create input and output tensors +def test_inline_source_string(): + """Test ExternalFunction constructed inline with a source string.""" input_tensor = iron.randint(0, 100, (1024,), dtype=np.int32, device="npu") output_tensor = iron.zeros((1024,), dtype=np.int32, device="npu") initial_tensor = input_tensor.numpy().copy() - # Create ExternalFunction and use it with context manager - with ExternalFunction( - "add_one_context", + add_one = ExternalFunction( + "add_one_inline", source_string="""extern "C" { - void add_one_context(int* input, int* output, int tile_size) { + void add_one_inline(int* input, int* output, int tile_size) { for (int i = 0; i < tile_size; i++) { output[i] = input[i] + 1; } @@ -437,28 +433,23 @@ def test_context_manager(): np.ndarray[(16,), np.dtype[np.int32]], np.int32, ], - ) as add_one: - # Apply the transform - transform(input_tensor, output_tensor, add_one) + ) + transform(input_tensor, output_tensor, add_one) - # Verify results expected = initial_tensor + 1 - actual = output_tensor.numpy() - np.testing.assert_array_equal(actual, expected) + np.testing.assert_array_equal(output_tensor.numpy(), expected) -def test_context_manager_with_compiler_options(): - """Test ExternalFunction with context manager and compiler options.""" - # Create input and output tensors +def test_inline_source_string_with_compiler_options(): + """Test ExternalFunction constructed inline with compile flags.""" input_tensor = iron.randint(0, 100, (1024,), dtype=np.int32, device="npu") output_tensor = iron.zeros((1024,), dtype=np.int32, device="npu") initial_tensor = input_tensor.numpy().copy() - # Create ExternalFunction with compiler options using context manager - with ExternalFunction( - "add_value_context", + add_value = ExternalFunction( + "add_value_inline", source_string="""extern "C" { - void add_value_context(int* input, int* output, int tile_size) { + void add_value_inline(int* input, int* output, int tile_size) { for (int i = 0; i < tile_size; i++) { output[i] = input[i] + ADD_VALUE; } @@ -470,14 +461,11 @@ def test_context_manager_with_compiler_options(): np.int32, ], compile_flags=["-DADD_VALUE=42"], - ) as add_value: - # Apply the transform - transform(input_tensor, output_tensor, add_value) + ) + transform(input_tensor, output_tensor, add_value) - # Verify results expected = initial_tensor + 42 - actual = output_tensor.numpy() - np.testing.assert_array_equal(actual, expected) + np.testing.assert_array_equal(output_tensor.numpy(), expected) def test_source_file(): diff --git a/test/python/npu-xrt/test_jit_two_extern_functions.py b/test/python/npu-xrt/test_jit_two_extern_functions.py new file mode 100644 index 00000000000..6da87b52add --- /dev/null +++ b/test/python/npu-xrt/test_jit_two_extern_functions.py @@ -0,0 +1,162 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 AMD Inc. + +# RUN: %run_on_npu1% %pytest %s +# RUN: %run_on_npu2% %pytest %s +# REQUIRES: xrt_python_bindings + +# End-to-end test for the core new capability: a single Worker calling TWO +# distinct ExternalFunction instances, each compiled to its own object file. +# This exercises the full multi-.o JIT pipeline: +# 1. Two separate source compilations (two .o files in the cache dir) +# 2. aie-assign-core-link-files traces both func.call ops and emits +# link_files = ["add_one.o", "scale_by_two.o"] on the CoreOp +# 3. Two INPUT() directives in the linker script (Peano path) +# 4. Successful lld link with both object files +# 5. Core executes both functions: output[i] = (input[i] + 1) * 2 + +import numpy as np +import pytest + +import aie.iron as iron +from aie.iron import ExternalFunction, jit +from aie.iron import ObjectFifo, Worker, Runtime, Program +from aie.iron.placers import SequentialPlacer +from aie.iron.controlflow import range_ + + +@jit(is_placed=False) +def add_then_scale(input, output, add_func, scale_func): + """Apply add_func then scale_func sequentially on each tile.""" + num_elements = np.size(input) + tile_size = add_func.tile_size(0) + num_tiles = num_elements // tile_size + dtype = input.dtype + + tensor_ty = np.ndarray[(num_elements,), np.dtype[dtype]] + tile_ty = np.ndarray[(tile_size,), np.dtype[dtype]] + + of_in = ObjectFifo(tile_ty, name="in") + of_out = ObjectFifo(tile_ty, name="out") + + def core_body(of_in, of_out, add_fn, scale_fn): + for _ in range_(num_tiles): + elem_in = of_in.acquire(1) + elem_out = of_out.acquire(1) + # Apply add_fn first, writing result into elem_out as a temporary, + # then apply scale_fn in-place on elem_out. + add_fn(elem_in, elem_out, tile_size) + scale_fn(elem_out, elem_out, tile_size) + of_in.release(1) + of_out.release(1) + + worker = Worker( + core_body, + fn_args=[of_in.cons(), of_out.prod(), add_func, scale_func], + ) + + rt = Runtime() + with rt.sequence(tensor_ty, tensor_ty) as (A, B): + rt.start(worker) + rt.fill(of_in.prod(), A) + rt.drain(of_out.cons(), B, wait=True) + + return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + + +def test_two_external_functions_different_objects(): + """ + One core calls two ExternalFunctions compiled to separate object files. + Expected result: output[i] = (input[i] + 1) * 2. + """ + add_one = ExternalFunction( + "add_one", + source_string="""extern "C" { + void add_one(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] + 1; + } + }""", + arg_types=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + np.int32, + ], + ) + + scale_by_two = ExternalFunction( + "scale_by_two", + source_string="""extern "C" { + void scale_by_two(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] * 2; + } + }""", + arg_types=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + np.int32, + ], + ) + + input_tensor = iron.arange(32, dtype=np.int32) + output_tensor = iron.zeros((32,), dtype=np.int32) + + add_then_scale(input_tensor, output_tensor, add_one, scale_by_two) + + expected = (np.arange(32, dtype=np.int32) + 1) * 2 + np.testing.assert_array_equal(output_tensor.numpy(), expected) + + +def test_two_external_functions_same_object(): + """ + One core calls two ExternalFunctions that share the same compiled object + file. The aie-assign-core-link-files pass must deduplicate the .o path + so it appears only once in link_files and is linked exactly once. + Expected result: output[i] = (input[i] + 1) * 2 (same computation, shared .o). + """ + # Both functions come from the same translation unit / object file name. + add_one = ExternalFunction( + "add_one_shared", + object_file_name="shared_kernel.o", + source_string="""extern "C" { + void add_one_shared(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] + 1; + } + void scale_by_two_shared(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] * 2; + } + }""", + arg_types=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + np.int32, + ], + ) + + scale_by_two = ExternalFunction( + "scale_by_two_shared", + object_file_name="shared_kernel.o", + source_string="""extern "C" { + void add_one_shared(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] + 1; + } + void scale_by_two_shared(int* in, int* out, int n) { + for (int i = 0; i < n; i++) out[i] = in[i] * 2; + } + }""", + arg_types=[ + np.ndarray[(16,), np.dtype[np.int32]], + np.ndarray[(16,), np.dtype[np.int32]], + np.int32, + ], + ) + + input_tensor = iron.arange(32, dtype=np.int32) + output_tensor = iron.zeros((32,), dtype=np.int32) + + add_then_scale(input_tensor, output_tensor, add_one, scale_by_two) + + expected = (np.arange(32, dtype=np.int32) + 1) * 2 + np.testing.assert_array_equal(output_tensor.numpy(), expected) diff --git a/test/python/npu.py b/test/python/npu.py index 666206aa21e..32e8d773c49 100644 --- a/test/python/npu.py +++ b/test/python/npu.py @@ -46,7 +46,9 @@ def my_vector_scalar(module): def device_body(): n_ty = np.ndarray[(n,), np.dtype[np.int32]] N_ty = np.ndarray[(N,), np.dtype[np.int32]] - scale_int32 = external_func("scale_int32", inputs=[n_ty, n_ty]) + scale_int32 = external_func( + "scale_int32", inputs=[n_ty, n_ty], link_with="scale.o" + ) S = tile(0, 0) M = tile(0, 2) @@ -54,7 +56,7 @@ def device_body(): of_in = object_fifo("in", S, M, buffer_depth, n_ty) of_out = object_fifo("out", M, S, buffer_depth, n_ty) - @core(M, "scale.o") + @core(M) def core_body(): # Effective while(1) for _ in range_(0xFFFFFFFF): @@ -117,7 +119,9 @@ def my_matmul(module): def device_body(): func_type = "" if vectorized else "scalar_" zero = external_func( - f"zero_{func_type}i16", inputs=[np.ndarray[(m, n), np.dtype[np.int16]]] + f"zero_{func_type}i16", + inputs=[np.ndarray[(m, n), np.dtype[np.int16]]], + link_with="mm.o", ) matmul = external_func( f"matmul_{func_type}i16_i16", @@ -126,6 +130,7 @@ def device_body(): np.ndarray[(k, n), np.dtype[np.int16]], np.ndarray[(m, n), np.dtype[np.int16]], ], + link_with="mm.o", ) S = tile(0, 0) @@ -135,7 +140,7 @@ def device_body(): of_inB = object_fifo("inB", S, M, 2, np.ndarray[(k, n), np.dtype[np.int16]]) of_outC = object_fifo("outC", M, S, 2, np.ndarray[(m, n), np.dtype[np.int16]]) - @core(M, "mm.o") + @core(M) def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles): @@ -211,7 +216,9 @@ def device_body(): vec64_ty = np.ndarray[(64,), np.dtype[np.uint8]] vec256_ty = np.ndarray[(256,), np.dtype[np.uint8]] rgba2gray_line = external_func( - "rgba2gray_line", inputs=[vec256_ty, vec64_ty, np.int32] + "rgba2gray_line", + inputs=[vec256_ty, vec64_ty, np.int32], + link_with="rgba2gray.cc.o", ) filter2d_line = external_func( "filter2d_line", @@ -223,6 +230,7 @@ def device_body(): np.int32, np.ndarray[(3, 3), np.dtype[np.int16]], ], + link_with="filter2d.cc.o", ) threshold_line = external_func( "threshold_line", @@ -234,9 +242,12 @@ def device_body(): np.int16, np.int8, ], + link_with="threshold.cc.o", ) gray2rgba_line = external_func( - "gray2rgba_line", inputs=[vec64_ty, vec256_ty, np.int32] + "gray2rgba_line", + inputs=[vec64_ty, vec256_ty, np.int32], + link_with="gray2rgba.cc.o", ) add_weighted_line = external_func( "add_weighted_line", @@ -249,6 +260,7 @@ def device_body(): np.int16, np.int8, ], + link_with="addWeighted.cc.o", ) S = tile(0, 0) @@ -271,7 +283,7 @@ def device_body(): OF_4to5 = object_fifo("OF_4to5", T4, T5, 2, vec64_ty) OF_5to5 = object_fifo("OF_5to5", T5, T5, 1, vec256_ty) - @core(T2, "rgba2gray.cc.o") + @core(T2) def core_body(): for _ in range_(36): elem_in = inOF_L2L1.acquire(ObjectFifoPort.Consume, 1) @@ -282,7 +294,7 @@ def core_body(): inOF_L2L1.release(ObjectFifoPort.Consume, 1) OF_2to3.release(ObjectFifoPort.Produce, 1) - @core(T3, "filter2d.cc.o") + @core(T3) def core_body(): kernel = memref.alloc((3, 3), T.i16()) v0 = 0 @@ -335,7 +347,7 @@ def core_body(): OF_2to3.release(ObjectFifoPort.Consume, 2) OF_3to4.release(ObjectFifoPort.Produce, 1) - @core(T4, "threshold.cc.o") + @core(T4) def core_body(): v_thr = 10 v_max = 255 @@ -348,7 +360,7 @@ def core_body(): OF_3to4.release(ObjectFifoPort.Consume, 1) OF_4to5.release(ObjectFifoPort.Produce, 1) - @core(T5, "combined_gray2rgba_addWeighted.a") + @core(T5) def core_body(): for _ in range_(36): elem_in = OF_4to5.acquire(ObjectFifoPort.Consume, 1) diff --git a/test/python/trace_utils.py b/test/python/trace_utils.py index 33d1c348de6..9b690d32123 100644 --- a/test/python/trace_utils.py +++ b/test/python/trace_utils.py @@ -49,7 +49,9 @@ def device_body(): # AIE Core Function declarations passThroughLine = external_func( - "passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()] + "passThroughLine", + inputs=[memRef_ty, memRef_ty, T.i32()], + link_with="passThrough.cc.o", ) # Tile declarations @@ -66,7 +68,7 @@ def device_body(): # Set up compute tiles # Compute tile 2 - @core(ComputeTile2, "passThrough.cc.o") + @core(ComputeTile2) def core_body(): for _ in range_(sys.maxsize): elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) diff --git a/test/unit_tests/aie/12_julia/aie.mlir b/test/unit_tests/aie/12_julia/aie.mlir index ab51d46de49..8e296ee5e1a 100644 --- a/test/unit_tests/aie/12_julia/aie.mlir +++ b/test/unit_tests/aie/12_julia/aie.mlir @@ -23,14 +23,14 @@ aie.device(xcvc1902) { %buf13_1 = aie.buffer(%tile13) { sym_name = "b" } : memref<4096xi32> %lock13_3 = aie.lock(%tile13, 3) { sym_name = "output_lock" } - func.func private @func(%A: memref<2xi32>, %B: memref<4096xi32>) -> () + func.func private @func(%A: memref<2xi32>, %B: memref<4096xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire func.call @func(%buf13_0, %buf13_1) : (memref<2xi32>, memref<4096xi32>) -> () aie.use_lock(%lock13_3, "Release", 0) // release for write aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/aie/13_julia_fp/aie.mlir b/test/unit_tests/aie/13_julia_fp/aie.mlir index 90bf1c370b8..9af18d571e2 100644 --- a/test/unit_tests/aie/13_julia_fp/aie.mlir +++ b/test/unit_tests/aie/13_julia_fp/aie.mlir @@ -23,14 +23,14 @@ aie.device(xcvc1902) { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "inout_lock" } - func.func private @func(%A: memref<256xf32>, %B: memref<256xf32>) -> () + func.func private @func(%A: memref<256xf32>, %B: memref<256xf32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire func.call @func(%buf13_0, %buf13_1) : (memref<256xf32>, memref<256xf32>) -> () aie.use_lock(%lock13_3, "Release", 0) // release for write aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/aie2/01_precompiled_core_function/aie.mlir b/test/unit_tests/aie2/01_precompiled_core_function/aie.mlir index a962626acd0..f603f512a5e 100644 --- a/test/unit_tests/aie2/01_precompiled_core_function/aie.mlir +++ b/test/unit_tests/aie2/01_precompiled_core_function/aie.mlir @@ -22,7 +22,7 @@ module @test_chesss_01_precompiled_core_function { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } %lock13_5 = aie.lock(%tile13, 5) { sym_name = "output_lock" } - func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () + func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire for read(e.g. input ping) @@ -31,6 +31,6 @@ module @test_chesss_01_precompiled_core_function { aie.use_lock(%lock13_3, "Release", 0) // release for write aie.use_lock(%lock13_5, "Release", 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/aie2/03_cascade_core_functions/aie.mlir b/test/unit_tests/aie2/03_cascade_core_functions/aie.mlir index 0c6cb5645f4..7c8b727ec8a 100644 --- a/test/unit_tests/aie2/03_cascade_core_functions/aie.mlir +++ b/test/unit_tests/aie2/03_cascade_core_functions/aie.mlir @@ -25,14 +25,14 @@ module { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } // input buffer lock %lock23_7 = aie.lock(%tile23, 7) { sym_name = "output_lock" } // output buffer lock - func.func private @do_mul(%A: memref<256xi32>) -> () - func.func private @do_mac(%A: memref<256xi32>) -> () + func.func private @do_mul(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} + func.func private @do_mac(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, AcquireGreaterEqual, 1) // acquire for read(e.g. input ping) func.call @do_mul(%buf13_0) : (memref<256xi32>) -> () aie.end - } { link_with="kernel.o" } + } %core23 = aie.core(%tile23) { // %val1 = arith.constant 7 : i32 @@ -41,6 +41,6 @@ module { func.call @do_mac(%buf23_0) : (memref<256xi32>) -> () aie.use_lock(%lock23_7, Release, 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/aie2/05_shim_dma_core_function/aie.mlir b/test/unit_tests/aie2/05_shim_dma_core_function/aie.mlir index c98e1743242..f90f5fc16e9 100644 --- a/test/unit_tests/aie2/05_shim_dma_core_function/aie.mlir +++ b/test/unit_tests/aie2/05_shim_dma_core_function/aie.mlir @@ -31,7 +31,7 @@ module @test_chess_05_shim_dma_core_function { %lock_b_read = aie.lock(%t73, 6) %lock_done = aie.lock(%t73, 7) - func.func private @func(%A: memref<16xi32>, %B: memref<16xi32>) -> () + func.func private @func(%A: memref<16xi32>, %B: memref<16xi32>) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { @@ -54,7 +54,7 @@ module @test_chess_05_shim_dma_core_function { } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/test/unit_tests/aie2/07_shim_dma_core_function_with_loop/aie.mlir b/test/unit_tests/aie2/07_shim_dma_core_function_with_loop/aie.mlir index ce1d0c15c20..900607ab488 100644 --- a/test/unit_tests/aie2/07_shim_dma_core_function_with_loop/aie.mlir +++ b/test/unit_tests/aie2/07_shim_dma_core_function_with_loop/aie.mlir @@ -29,7 +29,7 @@ module @test_chess_04_deprecated_shim_dma_precompiled_kernel{ %lock_b_ping = aie.lock(%t73, 5) // b_ping %lock_b_pong = aie.lock(%t73, 6) // b_pong - func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () + func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { %buffer_size = arith.constant 64 : i32 @@ -59,7 +59,7 @@ module @test_chess_04_deprecated_shim_dma_precompiled_kernel{ } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/test/unit_tests/chess_compiler_tests/01_precompiled_core_function/aie.mlir b/test/unit_tests/chess_compiler_tests/01_precompiled_core_function/aie.mlir index 790397c0211..bf6a3859a47 100644 --- a/test/unit_tests/chess_compiler_tests/01_precompiled_core_function/aie.mlir +++ b/test/unit_tests/chess_compiler_tests/01_precompiled_core_function/aie.mlir @@ -28,7 +28,7 @@ aie.device(xcvc1902) { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } %lock13_5 = aie.lock(%tile13, 5) { sym_name = "output_lock" } - func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () + func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire for read(e.g. input ping) @@ -37,7 +37,7 @@ aie.device(xcvc1902) { aie.use_lock(%lock13_3, "Release", 0) // release for write aie.use_lock(%lock13_5, "Release", 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/chess_compiler_tests/03_cascade_core_functions/aie.mlir b/test/unit_tests/chess_compiler_tests/03_cascade_core_functions/aie.mlir index ef6dc31097d..e7989b81570 100644 --- a/test/unit_tests/chess_compiler_tests/03_cascade_core_functions/aie.mlir +++ b/test/unit_tests/chess_compiler_tests/03_cascade_core_functions/aie.mlir @@ -29,16 +29,16 @@ aie.device(xcvc1902) { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } // input buffer lock %lock23_7 = aie.lock(%tile23, 7) { sym_name = "output_lock" } // output buffer lock - func.func private @do_mul(%A: memref<256xi32>) -> () - func.func private @do_mac(%A: memref<256xi32>) -> () - + func.func private @do_mul(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} + func.func private @do_mac(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} + %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire for read(e.g. input ping) func.call @do_mul(%buf13_0) : (memref<256xi32>) -> () aie.use_lock(%lock13_3, "Release", 0) // release for write aie.end - } { link_with="kernel.o" } - + } + %core23 = aie.core(%tile23) { // %val1 = arith.constant 7 : i32 // %idx1 = arith.constant 0 : index @@ -47,7 +47,7 @@ aie.device(xcvc1902) { func.call @do_mac(%buf23_0) : (memref<256xi32>) -> () aie.use_lock(%lock23_7, "Release", 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/chess_compiler_tests/05_shim_dma_core_function/aie.mlir b/test/unit_tests/chess_compiler_tests/05_shim_dma_core_function/aie.mlir index 1dcacc1319b..4102158c46f 100644 --- a/test/unit_tests/chess_compiler_tests/05_shim_dma_core_function/aie.mlir +++ b/test/unit_tests/chess_compiler_tests/05_shim_dma_core_function/aie.mlir @@ -35,7 +35,7 @@ aie.device(xcvc1902) { %lock_b_ping = aie.lock(%t73, 5) // b_ping %lock_b_pong = aie.lock(%t73, 6) // b_pong - func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () + func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { @@ -60,7 +60,7 @@ aie.device(xcvc1902) { } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/test/unit_tests/chess_compiler_tests/07_shim_dma_core_function_with_loop/aie.mlir b/test/unit_tests/chess_compiler_tests/07_shim_dma_core_function_with_loop/aie.mlir index 46a82f997c8..23a54d6521d 100644 --- a/test/unit_tests/chess_compiler_tests/07_shim_dma_core_function_with_loop/aie.mlir +++ b/test/unit_tests/chess_compiler_tests/07_shim_dma_core_function_with_loop/aie.mlir @@ -38,7 +38,7 @@ aie.device(xcvc1902) { %lock_b_ping = aie.lock(%t73, 5) // b_ping %lock_b_pong = aie.lock(%t73, 6) // b_pong - func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () + func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { %buffer_size = arith.constant 64 : i32 @@ -68,7 +68,7 @@ aie.device(xcvc1902) { } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/test/unit_tests/chess_compiler_tests_aie2/01_precompiled_core_function/aie.mlir b/test/unit_tests/chess_compiler_tests_aie2/01_precompiled_core_function/aie.mlir index efb4aa07444..18c588dc24f 100644 --- a/test/unit_tests/chess_compiler_tests_aie2/01_precompiled_core_function/aie.mlir +++ b/test/unit_tests/chess_compiler_tests_aie2/01_precompiled_core_function/aie.mlir @@ -27,7 +27,7 @@ module @test_chesss_01_precompiled_core_function { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } %lock13_5 = aie.lock(%tile13, 5) { sym_name = "output_lock" } - func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () + func.func private @func(%A: memref<256xi32>, %B: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, "Acquire", 1) // acquire for read(e.g. input ping) @@ -36,6 +36,6 @@ module @test_chesss_01_precompiled_core_function { aie.use_lock(%lock13_3, "Release", 0) // release for write aie.use_lock(%lock13_5, "Release", 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/chess_compiler_tests_aie2/03_cascade_core_functions/aie.mlir b/test/unit_tests/chess_compiler_tests_aie2/03_cascade_core_functions/aie.mlir index d1234e54bda..4013037f04f 100644 --- a/test/unit_tests/chess_compiler_tests_aie2/03_cascade_core_functions/aie.mlir +++ b/test/unit_tests/chess_compiler_tests_aie2/03_cascade_core_functions/aie.mlir @@ -28,14 +28,14 @@ module { %lock13_3 = aie.lock(%tile13, 3) { sym_name = "input_lock" } // input buffer lock %lock23_7 = aie.lock(%tile23, 7) { sym_name = "output_lock" } // output buffer lock - func.func private @do_mul(%A: memref<256xi32>) -> () - func.func private @do_mac(%A: memref<256xi32>) -> () + func.func private @do_mul(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} + func.func private @do_mac(%A: memref<256xi32>) -> () attributes {link_with = "kernel.o"} %core13 = aie.core(%tile13) { aie.use_lock(%lock13_3, AcquireGreaterEqual, 1) // acquire for read(e.g. input ping) func.call @do_mul(%buf13_0) : (memref<256xi32>) -> () aie.end - } { link_with="kernel.o" } + } %core23 = aie.core(%tile23) { // %val1 = arith.constant 7 : i32 @@ -44,6 +44,6 @@ module { func.call @do_mac(%buf23_0) : (memref<256xi32>) -> () aie.use_lock(%lock23_7, Release, 1) // release for read aie.end - } { link_with="kernel.o" } + } } } diff --git a/test/unit_tests/chess_compiler_tests_aie2/05_shim_dma_core_function/aie.mlir b/test/unit_tests/chess_compiler_tests_aie2/05_shim_dma_core_function/aie.mlir index f9d28035713..d546d5a4325 100644 --- a/test/unit_tests/chess_compiler_tests_aie2/05_shim_dma_core_function/aie.mlir +++ b/test/unit_tests/chess_compiler_tests_aie2/05_shim_dma_core_function/aie.mlir @@ -38,7 +38,7 @@ module @test_chess_05_shim_dma_core_function { %lock_b_read = aie.lock(%t73, 6) %lock_done = aie.lock(%t73, 7) - func.func private @func(%A: memref<16xi32>, %B: memref<16xi32>) -> () + func.func private @func(%A: memref<16xi32>, %B: memref<16xi32>) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { @@ -61,7 +61,7 @@ module @test_chess_05_shim_dma_core_function { } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/test/unit_tests/chess_compiler_tests_aie2/07_shim_dma_core_function_with_loop/aie.mlir b/test/unit_tests/chess_compiler_tests_aie2/07_shim_dma_core_function_with_loop/aie.mlir index 71f48b56609..93ebe7fc28e 100644 --- a/test/unit_tests/chess_compiler_tests_aie2/07_shim_dma_core_function_with_loop/aie.mlir +++ b/test/unit_tests/chess_compiler_tests_aie2/07_shim_dma_core_function_with_loop/aie.mlir @@ -34,7 +34,7 @@ module @test_chess_04_deprecated_shim_dma_precompiled_kernel{ %lock_b_ping = aie.lock(%t73, 5) // b_ping %lock_b_pong = aie.lock(%t73, 6) // b_pong - func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () + func.func private @func(%A: memref<64xi32>, %B: memref<64xi32>, %C: i32) -> () attributes {link_with = "kernel.o"} %c13 = aie.core(%t73) { %buffer_size = arith.constant 64 : i32 @@ -64,7 +64,7 @@ module @test_chess_04_deprecated_shim_dma_precompiled_kernel{ } aie.end - } { link_with="kernel.o" } + } // Tile DMA %m73 = aie.mem(%t73) { diff --git a/tools/aiecc/aiecc.cpp b/tools/aiecc/aiecc.cpp index a2188900b30..4e42067a0aa 100644 --- a/tools/aiecc/aiecc.cpp +++ b/tools/aiecc/aiecc.cpp @@ -117,8 +117,6 @@ #include #include -#include "aiecc_aiesim.h" - using namespace llvm; using namespace mlir; @@ -1080,14 +1078,22 @@ static std::string getAIETargetForDevice(ModuleOp moduleOp, // AIE Device and Core Discovery //===----------------------------------------------------------------------===// +/// Per-core metadata extracted from a CoreOp before the compilation pipeline +/// begins. All fields are populated by getCoreInfo(). struct CoreInfo { - std::int32_t col; - std::int32_t row; - std::string linkWith; // External object files to link - std::string elfFile; // Generated ELF path (if already specified) + std::int32_t col = 0; ///< Tile column (from TileOp). + std::int32_t row = 0; ///< Tile row (from TileOp). + /// External object files to link into this core's ELF. Populated from + /// CoreOp::getLinkFiles() (canonical) or CoreOp::getLinkWith() (deprecated + /// fallback when aie-assign-core-link-files was not run). + SmallVector linkFiles; + /// If non-empty, the ELF was provided via the elf_file attribute; no + /// compilation is needed. + std::string elfFile; }; -/// Check if a CoreOp has a non-empty body (more than just aie.end). +/// Returns true if the CoreOp has a non-empty body (i.e., anything beyond the +/// mandatory aie.end terminator). static bool coreHasNonemptyBody(xilinx::AIE::CoreOp coreOp) { for (auto &block : coreOp.getBody()) { if (block.getOperations().size() > 1) @@ -1096,7 +1102,20 @@ static bool coreHasNonemptyBody(xilinx::AIE::CoreOp coreOp) { return false; } -// Helper to extract core info from a CoreOp +/// Returns true if a CoreOp requires compilation or linking. +/// +/// Skips hollow cores created by --expand-load-pdis (empty body, no elf_file, +/// no link files), which exist only to satisfy structural constraints. +static bool coreNeedsCompilation(xilinx::AIE::CoreOp coreOp) { + return coreOp.getElfFileAttr() || coreOp.getLinkWithAttr() || + coreOp.getLinkFiles() || coreHasNonemptyBody(coreOp); +} + +/// Extracts tile coordinates and link-file metadata from a CoreOp. +/// +/// Prefers the canonical link_files attribute (set by +/// aie-assign-core-link-files). Falls back to the deprecated core-level +/// link_with attribute if link_files is absent (e.g., the pass was not run). static CoreInfo getCoreInfo(xilinx::AIE::CoreOp coreOp) { CoreInfo info; auto tileOp = dyn_cast(coreOp.getTile().getDefiningOp()); @@ -1105,8 +1124,15 @@ static CoreInfo getCoreInfo(xilinx::AIE::CoreOp coreOp) { info.row = tileOp.getRow(); } - if (auto linkWithAttr = coreOp.getLinkWithAttr()) { - info.linkWith = linkWithAttr.getValue().str(); + // Prefer canonical link_files ArrayAttr (populated by AIEAssignCoreLinkFiles, + // which runs as part of the resource-allocation pipeline). + if (auto filesAttr = coreOp.getLinkFiles()) { + for (auto f : filesAttr->getAsRange()) + info.linkFiles.push_back(f.getValue().str()); + } else if (auto linkWithAttr = coreOp.getLinkWithAttr()) { + // Fallback: deprecated core-level link_with was not migrated by the pass + // (e.g., pipeline was not run). Treat it as a single-element list. + info.linkFiles.push_back(linkWithAttr.getValue().str()); } if (auto elfAttr = coreOp.getElfFileAttr()) { @@ -1396,6 +1422,9 @@ static LogicalResult runResourceAllocationPipeline(ModuleOp moduleOp, bufferOpts.clAllocScheme = allocScheme.getValue(); devicePm.addPass(xilinx::AIE::createAIEAssignBufferAddressesPass(bufferOpts)); + // Infer per-core link_files from func-level link_with attributes + devicePm.addPass(xilinx::AIE::createAIEAssignCoreLinkFilesPass()); + devicePm.addPass(xilinx::AIE::createAIEVectorTransferLoweringPass()); // Step 5: Convert SCF to CF (module-level pass) @@ -1801,6 +1830,46 @@ static LogicalResult runUnifiedLLVMLoweringPipeline(ModuleOp moduleOp, return success(); } +/// Copy \p src to \p destDir / \p destBasename atomically by writing to a +/// sibling temp file first, then renaming. On POSIX, rename(2) is atomic +/// within the same filesystem, so parallel compilations sharing the same +/// destination filename do not corrupt each other's copy. +static LogicalResult atomicCopyFile(StringRef src, StringRef destDir, + StringRef destBasename) { + SmallString<256> dest(destDir); + sys::path::append(dest, destBasename); + + // Write to a sibling temp file in destDir, then rename atomically. + // Keeping the temp in the same directory ensures they share a filesystem, + // so rename(2) is never cross-device (no EXDEV failure). + SmallString<256> tmpModel(destDir); + SmallString<64> tmpFilename; + tmpFilename += sys::path::stem(destBasename); + tmpFilename += "-%%%%%%"; + tmpFilename += sys::path::extension(destBasename); + sys::path::append(tmpModel, tmpFilename); + SmallString<256> tmpPath; + if (sys::fs::createUniqueFile(tmpModel, tmpPath)) { + llvm::errs() << "Error: could not create temp file in " << destDir << "\n"; + return failure(); + } + + if (std::error_code ec = sys::fs::copy_file(src, tmpPath)) { + llvm::errs() << "Error: could not copy " << src << " to " << tmpPath << ": " + << ec.message() << "\n"; + sys::fs::remove(tmpPath); + return failure(); + } + + if (std::error_code ec = sys::fs::rename(tmpPath, dest)) { + llvm::errs() << "Error: could not rename " << tmpPath << " to " << dest + << ": " << ec.message() << "\n"; + sys::fs::remove(tmpPath); + return failure(); + } + return success(); +} + //===----------------------------------------------------------------------===// // Core Compilation //===----------------------------------------------------------------------===// @@ -1899,7 +1968,32 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, std::to_string(core.row) + ".ld.script"); if (!xbridge) { - // Generate linker script to file using the original (unmodified) module + // Clone the pre-lowering module for ldscript generation. We need a + // separate clone here because coreModule will be destructively lowered to + // LLVM IR by runLLVMLoweringPipeline below, making it unsuitable for + // AIETranslateToLdScript. We also cannot mutate the shared moduleOp + // (data race with parallel core threads), so this per-thread clone is the + // correct place to rewrite link_files to absolute paths. + OwningOpRef ldScriptModule = moduleOp.clone(); + ldScriptModule->walk([&](xilinx::AIE::CoreOp coreOp) { + auto tileOp = + dyn_cast(coreOp.getTile().getDefiningOp()); + if (!tileOp || tileOp.getCol() != core.col || tileOp.getRow() != core.row) + return; + if (auto filesAttr = coreOp.getLinkFiles()) { + SmallVector absFiles; + for (auto f : filesAttr->getAsRange()) { + SmallString<256> absPath(tmpDirName); + sys::path::append(absPath, sys::path::filename(f.getValue())); + absFiles.push_back( + mlir::StringAttr::get(ldScriptModule->getContext(), absPath)); + } + coreOp.setLinkFilesAttr( + mlir::ArrayAttr::get(ldScriptModule->getContext(), absFiles)); + } + }); + + // Generate linker script from the pre-lowering clone with absolute paths. std::error_code ec; raw_fd_ostream ldScriptFile(ldScriptPath, ec); if (ec) { @@ -1910,7 +2004,7 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, } if (failed(xilinx::AIE::AIETranslateToLdScript( - moduleOp, ldScriptFile, core.col, core.row, deviceName))) { + *ldScriptModule, ldScriptFile, core.col, core.row, deviceName))) { std::lock_guard lock(outputMutex); llvm::errs() << "Error generating linker script\n"; return failure(); @@ -2146,11 +2240,10 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, llvm::outs() << "Generated BCF: " << bcfPath << "\n"; } - // Extract link_with files from BCF + // Extract external object files listed in the BCF's _include _file + // directives. Search order: current working directory, then tmpDirName (JIT + // cache), then the directory containing the input MLIR file. std::vector linkWithFiles = extractInputFilesFromBCF(bcfPath); - - // Handle link_with files: copy to .prj directory if needed - // Search order: current working directory, then input file directory std::string linkWithArgs; for (const auto &linkWithFile : linkWithFiles) { SmallString<256> srcPath; @@ -2183,30 +2276,19 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, } } - // Copy to .prj directory + // Copy to .prj directory atomically to avoid races between parallel + // cores. SmallString<256> destPath(tmpDirName); sys::path::append(destPath, sys::path::filename(linkWithFile)); - - if (srcPath == destPath) { - if (verbose) { - std::lock_guard lock(outputMutex); - llvm::outs() << "link_with file already in place: " << srcPath - << "\n"; - } - } else { - sys::fs::remove(destPath); - std::error_code ec = sys::fs::copy_file(srcPath, destPath); - if (ec) { - std::lock_guard lock(outputMutex); - llvm::errs() << "Error: Could not copy link_with file: " << srcPath - << " to " << destPath << ": " << ec.message() << "\n"; + if (srcPath != destPath) { + if (failed(atomicCopyFile(srcPath, tmpDirName, + sys::path::filename(linkWithFile)))) return failure(); - } if (verbose) { std::lock_guard lock(outputMutex); - llvm::outs() << "Copied link_with: " << srcPath << " -> " << destPath - << "\n"; + llvm::outs() << "Copied external object: " << srcPath << " -> " + << destPath << "\n"; } } @@ -2236,7 +2318,8 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, std::string(workDir), "-d", "-f", std::string(objPath)}; - // Add link_with files if any + // Append external object files (previously copied to tmpDirName) to the + // xchesscc_wrapper link command. for (const auto &linkWithFile : linkWithFiles) { SmallString<256> localPath(tmpDirName); sys::path::append(localPath, sys::path::filename(linkWithFile)); @@ -2299,29 +2382,25 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, linkCmd.push_back(std::string(objPath)); - // Handle external object file if link_with attribute is specified - // The linker script generated by aie-translate will include an INPUT() - // directive for the link_with file, but it uses a relative path. - // We need to copy the file to the .prj directory so the linker can find it. - if (!core.linkWith.empty()) { - // Resolve the link_with path - check multiple locations: - // 1. If absolute, use as-is - // 2. Relative to current working directory (common for test cases) - // 3. Relative to input file directory (common for installed examples) + // Handle external object files specified via link_files (or deprecated + // link_with). The linker script generated by aie-translate will include an + // INPUT() directive for each file, but uses a relative path. We copy every + // file to the .prj directory so the linker can find them. + for (const auto &lf : core.linkFiles) { SmallString<256> srcLinkWith; - if (sys::path::is_absolute(core.linkWith)) { - srcLinkWith = core.linkWith; + if (sys::path::is_absolute(lf)) { + srcLinkWith = lf; } else { // First try current working directory SmallString<256> cwdPath; sys::fs::current_path(cwdPath); - sys::path::append(cwdPath, core.linkWith); + sys::path::append(cwdPath, lf); if (sys::fs::exists(cwdPath)) { srcLinkWith = cwdPath; } else { // Try tmpDirName (used in JIT where .o is pre-compiled there) SmallString<256> tmpPath(tmpDirName); - sys::path::append(tmpPath, core.linkWith); + sys::path::append(tmpPath, lf); if (sys::fs::exists(tmpPath)) { srcLinkWith = tmpPath; } else { @@ -2332,45 +2411,35 @@ static LogicalResult compileCore(MLIRContext &context, ModuleOp moduleOp, sys::fs::current_path(inputDir); } srcLinkWith = inputDir; - sys::path::append(srcLinkWith, core.linkWith); + sys::path::append(srcLinkWith, lf); sys::path::remove_dots(srcLinkWith, /*remove_dot_dot=*/true); } } } // Copy the object file to the .prj directory so the linker script's - // INPUT() directive can find it + // INPUT() directive can find it. Copy atomically to avoid races between + // parallel cores that share the same .o filename. SmallString<256> destLinkWith(tmpDirName); - sys::path::append(destLinkWith, sys::path::filename(core.linkWith)); - - if (srcLinkWith == destLinkWith) { - if (verbose) { - std::lock_guard lock(outputMutex); - llvm::outs() << "link_with file already in place: " << srcLinkWith - << "\n"; - } - } else { - // Remove destination file first if it exists (to ensure overwrite) - sys::fs::remove(destLinkWith); - - std::error_code ec = sys::fs::copy_file(srcLinkWith, destLinkWith); - if (ec) { - std::lock_guard lock(outputMutex); - llvm::errs() << "Error: Could not copy link_with file: " - << srcLinkWith << " to " << destLinkWith << "\n"; - llvm::errs() << "Error: " << ec.message() << "\n"; + sys::path::append(destLinkWith, sys::path::filename(lf)); + if (srcLinkWith != destLinkWith) { + if (failed(atomicCopyFile(srcLinkWith, tmpDirName, + sys::path::filename(lf)))) return failure(); - } if (verbose) { std::lock_guard lock(outputMutex); - llvm::outs() << "Copied link_with object: " << srcLinkWith << " -> " + llvm::outs() << "Copied external object: " << srcLinkWith << " -> " << destLinkWith << "\n"; } + } else if (verbose) { + std::lock_guard lock(outputMutex); + llvm::outs() << "External object already in place: " << srcLinkWith + << "\n"; } - // Note: We don't add the object file to linkStrs because the linker - // script already has an INPUT() directive for it + // Note: We don't add the object file to linkCmd because the linker + // script already has INPUT() directives for each file } // Make linker script path absolute @@ -2430,12 +2499,8 @@ compileCores(MLIRContext &context, ModuleOp moduleOp, Operation *deviceOp, SmallVector cores; deviceOp->walk([&](xilinx::AIE::CoreOp coreOp) { - // Skip cores with no elf_file, no link_with, and empty body - // (e.g., @empty device ops created by --expand-load-pdis) - if (coreOp.getElfFileAttr() || coreOp.getLinkWithAttr() || - coreHasNonemptyBody(coreOp)) { + if (coreNeedsCompilation(coreOp)) cores.push_back(getCoreInfo(coreOp)); - } }); if (cores.empty()) { @@ -2603,12 +2668,8 @@ compileCoresUnified(MLIRContext &context, ModuleOp moduleOp, SmallVector cores; deviceOp->walk([&](xilinx::AIE::CoreOp coreOp) { - // Skip cores with no elf_file, no link_with, and empty body - // (e.g., @empty device ops created by --expand-load-pdis) - if (coreOp.getElfFileAttr() || coreOp.getLinkWithAttr() || - coreHasNonemptyBody(coreOp)) { + if (coreNeedsCompilation(coreOp)) cores.push_back(getCoreInfo(coreOp)); - } }); if (cores.empty()) { @@ -2924,15 +2985,9 @@ compileCoresUnified(MLIRContext &context, ModuleOp moduleOp, SmallString<256> destPath(tmpDirName); sys::path::append(destPath, sys::path::filename(linkWithFile)); - if (srcPath == destPath) { - continue; - } - sys::fs::remove(destPath); - std::error_code ec = sys::fs::copy_file(srcPath, destPath); - if (ec) { - llvm::errs() << "Error copying link_with file: " << srcPath << "\n"; + if (failed(atomicCopyFile(srcPath, tmpDirName, + sys::path::filename(linkWithFile)))) return failure(); - } } auto xchessccWrapperPath = sys::findProgramByName("xchesscc_wrapper"); @@ -3011,24 +3066,23 @@ compileCoresUnified(MLIRContext &context, ModuleOp moduleOp, SmallString<256> peanoLld(peanoBinDir); sys::path::append(peanoLld, "ld.lld"); - // Handle link_with if specified - // Search order: current working directory, tmpDirName, input file - // directory - if (!core.linkWith.empty()) { + // Handle external object files specified via link_files (or deprecated + // link_with). Search order: absolute, cwd, tmpDirName, input file dir. + for (const auto &lf : core.linkFiles) { SmallString<256> srcLinkWith; - if (sys::path::is_absolute(core.linkWith)) { - srcLinkWith = core.linkWith; + if (sys::path::is_absolute(lf)) { + srcLinkWith = lf; } else { // First try current working directory SmallString<256> cwdPath; sys::fs::current_path(cwdPath); - sys::path::append(cwdPath, core.linkWith); + sys::path::append(cwdPath, lf); if (sys::fs::exists(cwdPath)) { srcLinkWith = cwdPath; } else { // Try tmpDirName (used in JIT where .o is pre-compiled there) SmallString<256> tmpPath(tmpDirName); - sys::path::append(tmpPath, core.linkWith); + sys::path::append(tmpPath, lf); if (sys::fs::exists(tmpPath)) { srcLinkWith = tmpPath; } else { @@ -3039,22 +3093,24 @@ compileCoresUnified(MLIRContext &context, ModuleOp moduleOp, sys::fs::current_path(inputDir); } srcLinkWith = inputDir; - sys::path::append(srcLinkWith, core.linkWith); + sys::path::append(srcLinkWith, lf); sys::path::remove_dots(srcLinkWith, /*remove_dot_dot=*/true); } } } SmallString<256> destLinkWith(tmpDirName); - sys::path::append(destLinkWith, sys::path::filename(core.linkWith)); + sys::path::append(destLinkWith, sys::path::filename(lf)); if (srcLinkWith != destLinkWith) { - sys::fs::remove(destLinkWith); - std::error_code ec = sys::fs::copy_file(srcLinkWith, destLinkWith); - if (ec) { - llvm::errs() << "Error copying link_with file: " << srcLinkWith - << "\n"; + if (failed(atomicCopyFile(srcLinkWith, tmpDirName, + sys::path::filename(lf)))) return failure(); - } + if (verbose) + llvm::outs() << "Copied link_with object: " << srcLinkWith << " -> " + << destLinkWith << "\n"; + } else if (verbose) { + llvm::outs() << "link_with object already in place: " << srcLinkWith + << "\n"; } }