From 934ae3b98653feb17233175ca4d8f125fdc59d15 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Fri, 10 Sep 2021 12:59:32 -0400 Subject: [PATCH 1/3] add preliminary tracing capabilities --- src/sst/elements/ariel/arielcore.cc | 12 ++++++++++++ src/sst/elements/ariel/arielcore.h | 5 +++++ src/sst/elements/ariel/arielcpu.cc | 1 + 3 files changed, 18 insertions(+) diff --git a/src/sst/elements/ariel/arielcore.cc b/src/sst/elements/ariel/arielcore.cc index 4d4eb8aea9..54e6d0da6a 100644 --- a/src/sst/elements/ariel/arielcore.cc +++ b/src/sst/elements/ariel/arielcore.cc @@ -15,6 +15,7 @@ #include #include "arielcore.h" +#include #ifdef HAVE_CUDA #include <../balar/balar_event.h> @@ -59,6 +60,11 @@ ArielCore::ArielCore(ComponentId_t id, ArielTunnel *tunnel, pendingTransactions = new std::unordered_map(); pending_transaction_count = 0; + tracefile = "/tmp/arielcore"; + tracefile = tracefile + std::to_string((uint64_t)id) + ".ssv"; + + tracefp.open(tracefile, std::ios::out | std::ios::trunc); + #ifdef HAVE_CUDA midTransfer = false; remainingTransfer = 0; @@ -131,6 +137,8 @@ ArielCore::~ArielCore() { if(enableTracing && traceGen) { delete traceGen; } + + tracefp.close(); } void ArielCore::setCacheLink(SimpleMem* newLink) { @@ -835,10 +843,14 @@ bool ArielCore::refillQueue() { switch(ac.command) { case ARIEL_PERFORM_READ: + // PAT + //printf("0 %"PRIu64" %"PRIu64"\n", ac.instPrt, ac.inst.addr); + tracefp << "0 " << ac.instPtr << " " << ac.inst.addr << std::endl; createReadEvent(ac.inst.addr, ac.inst.size); break; case ARIEL_PERFORM_WRITE: + tracefp << "1 " << ac.instPtr << " " << ac.inst.addr << std::endl; createWriteEvent(ac.inst.addr, ac.inst.size, &ac.inst.payload[0]); break; diff --git a/src/sst/elements/ariel/arielcore.h b/src/sst/elements/ariel/arielcore.h index b2199b7de0..59e0d3d3f9 100644 --- a/src/sst/elements/ariel/arielcore.h +++ b/src/sst/elements/ariel/arielcore.h @@ -32,6 +32,8 @@ #include #include +#include + #include #include #include @@ -180,6 +182,9 @@ class ArielCore : public ComponentExtension { uint32_t coreID; uint32_t maxPendingTransactions; + std::string tracefile; + std::ofstream tracefp; + #ifdef HAVE_CUDA size_t totalTransfer; bool gpu_enabled; diff --git a/src/sst/elements/ariel/arielcpu.cc b/src/sst/elements/ariel/arielcpu.cc index f35c53bf6b..dcb0f238cd 100644 --- a/src/sst/elements/ariel/arielcpu.cc +++ b/src/sst/elements/ariel/arielcpu.cc @@ -188,6 +188,7 @@ ArielCPU::ArielCPU(ComponentId_t id, Params& params) : // Set max number of instructions cpu_cores[i]->setMaxInsts(max_insts); + } // Find all the components loaded into the "memory" slot From 85a5f0eb12fcdb01ca1084efa9725c0872fcd138 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Thu, 2 Dec 2021 15:08:06 -0500 Subject: [PATCH 2/3] same group can be both source and dest --- src/sst/elements/memHierarchy/memNICBase.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sst/elements/memHierarchy/memNICBase.h b/src/sst/elements/memHierarchy/memNICBase.h index 8b990ea726..356c6c32a4 100644 --- a/src/sst/elements/memHierarchy/memNICBase.h +++ b/src/sst/elements/memHierarchy/memNICBase.h @@ -180,7 +180,8 @@ class MemNICBase : public MemLinkBase { if (sourceIDs.find(imre->info.id) != sourceIDs.end()) { addSource(imre->info); dbg.debug(_L10_, "\tAdding to sourceEndpointInfo. %zu sources found\n", sourceEndpointInfo.size()); - } else if (destIDs.find(imre->info.id) != destIDs.end()) { + } + if (destIDs.find(imre->info.id) != destIDs.end()) { addDest(imre->info); dbg.debug(_L10_, "\tAdding to destEndpointInfo. %zu destinations found\n", destEndpointInfo.size()); } From abcc740778850ae34af963547a5c21f4ddbcf583 Mon Sep 17 00:00:00 2001 From: Patrick Lavin Date: Tue, 15 Mar 2022 10:58:19 -0400 Subject: [PATCH 3/3] add model time stat and send conditional branch IP to arielcore --- src/sst/elements/ariel/ariel_shmem.h | 1 + src/sst/elements/ariel/arielcore.cc | 33 ++++++++++++++++++- src/sst/elements/ariel/arielcore.h | 1 + src/sst/elements/ariel/arielcpu.h | 1 + .../elements/ariel/frontend/pin3/fesimple.cc | 22 +++++++++++++ 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/sst/elements/ariel/ariel_shmem.h b/src/sst/elements/ariel/ariel_shmem.h index d5c40a69f0..7a340a2d3d 100644 --- a/src/sst/elements/ariel/ariel_shmem.h +++ b/src/sst/elements/ariel/ariel_shmem.h @@ -64,6 +64,7 @@ enum ArielShmemCmd_t { ARIEL_ISSUE_CUDA = 144, ARIEL_FLUSHLINE_INSTRUCTION = 154, ARIEL_FENCE_INSTRUCTION = 155, + ARIEL_CONDITIONAL_BRANCH = 156, }; #ifdef HAVE_CUDA diff --git a/src/sst/elements/ariel/arielcore.cc b/src/sst/elements/ariel/arielcore.cc index 54e6d0da6a..9135765809 100644 --- a/src/sst/elements/ariel/arielcore.cc +++ b/src/sst/elements/ariel/arielcore.cc @@ -16,6 +16,7 @@ #include #include "arielcore.h" #include +#include #ifdef HAVE_CUDA #include <../balar/balar_event.h> @@ -106,6 +107,10 @@ ArielCore::ArielCore(ComponentId_t id, ArielTunnel *tunnel, statFPSPOps = registerStatistic("fp_sp_ops", subID); statFPDPOps = registerStatistic("fp_dp_ops", subID); + // Model time + model_time = registerStatistic("model_time", subID); + + free(subID); memmgr->registerInterruptHandler(coreID, new ArielMemoryManager::InterruptHandler(this, &ArielCore::handleInterrupt)); @@ -296,7 +301,7 @@ void ArielCore::handleEvent(SimpleMem::Request* event) { if( verbosity >= 16) { for(int i = 0; i < getPageTransfer(); i++) output->verbose(CALL_INFO, 16, 0, "%" PRIu32 ", ", - getDataAddress()[i]); + getDataAddress()[i]); output->verbose(CALL_INFO, 16, 0, "\n"); } @@ -790,6 +795,9 @@ bool ArielCore::hasDrainCompleted() const { } bool ArielCore::refillQueue() { + + auto model_start = std::chrono::steady_clock::now(); + ARIEL_CORE_VERBOSE(16, output->verbose(CALL_INFO, 16, 0, "Refilling event queue for core %" PRIu32 "...\n", coreID)); while(coreQ->size() < maxQLength) { @@ -812,6 +820,10 @@ bool ArielCore::refillQueue() { fprintf(stdout, "Performing statistics output at simulation time = %" PRIu64 " cycles\n", getCurrentSimTimeNano()); performGlobalStatisticOutput(); break; + case ARIEL_CONDITIONAL_BRANCH: + //printf("Recieved info on conditional branch: %ld\n", ac.instPtr); + // TODO: Send info to phase detector + break; case ARIEL_START_INSTRUCTION: if(ARIEL_INST_SP_FP == ac.inst.instClass) { @@ -913,6 +925,9 @@ bool ArielCore::refillQueue() { } ARIEL_CORE_VERBOSE(16, output->verbose(CALL_INFO, 16, 0, "Refilling event queue for core %" PRIu32 " is complete\n", coreID)); + auto model_stop = std::chrono::steady_clock::now(); + auto model_delta = std::chrono::duration_cast(model_stop - model_start); + model_time->addData(model_delta.count()); return true; } @@ -923,6 +938,8 @@ void ArielCore::handleFreeEvent(ArielFreeEvent* rFE) { } void ArielCore::handleReadRequest(ArielReadEvent* rEv) { + auto model_start = std::chrono::steady_clock::now(); + ARIEL_CORE_VERBOSE(4, output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " processing a read event...\n", coreID)); const uint64_t readAddress = rEv->getAddress(); @@ -994,9 +1011,13 @@ void ArielCore::handleReadRequest(ArielReadEvent* rEv) { statReadRequests->addData(1); statReadRequestSizes->addData(readLength); + auto model_stop = std::chrono::steady_clock::now(); + auto model_delta = std::chrono::duration_cast(model_stop - model_start); + model_time->addData(model_delta.count()); } void ArielCore::handleWriteRequest(ArielWriteEvent* wEv) { + auto model_start = std::chrono::steady_clock::now(); ARIEL_CORE_VERBOSE(4, output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " processing a write event...\n", coreID)); const uint64_t writeAddress = wEv->getAddress(); @@ -1082,6 +1103,9 @@ void ArielCore::handleWriteRequest(ArielWriteEvent* wEv) { statWriteRequests->addData(1); statWriteRequestSizes->addData(writeLength); + auto model_stop = std::chrono::steady_clock::now(); + auto model_delta = std::chrono::duration_cast(model_stop - model_start); + model_time->addData(model_delta.count()); } @@ -1324,6 +1348,9 @@ void ArielCore::printCoreStatistics() { bool ArielCore::processNextEvent() { + auto model_start = std::chrono::steady_clock::now(); + + // Upon every call, check if the core is drained and we are fenced. If so, unfence // return true; /* Todo: reevaluate if this is needed */ // Attempt to refill the queue @@ -1461,6 +1488,10 @@ bool ArielCore::processNextEvent() { break; } + auto model_stop = std::chrono::steady_clock::now(); + auto model_delta = std::chrono::duration_cast(model_stop - model_start); + model_time->addData(model_delta.count()); + // If the event has actually been processed this cycle then remove it from the queue if(removeEvent) { ARIEL_CORE_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Removing event from pending queue, there are %" PRIu32 " events in the queue before deletion.\n", diff --git a/src/sst/elements/ariel/arielcore.h b/src/sst/elements/ariel/arielcore.h index 59e0d3d3f9..8ad3de4cd9 100644 --- a/src/sst/elements/ariel/arielcore.h +++ b/src/sst/elements/ariel/arielcore.h @@ -260,6 +260,7 @@ class ArielCore : public ComponentExtension { Statistic* statFPSPSIMDIns; Statistic* statFPSPScalarIns; Statistic* statFPSPOps; + Statistic* model_time; uint32_t pending_transaction_count; uint32_t pending_gpu_transaction_count; diff --git a/src/sst/elements/ariel/arielcpu.h b/src/sst/elements/ariel/arielcpu.h index 63151183de..e6a1055efb 100644 --- a/src/sst/elements/ariel/arielcpu.h +++ b/src/sst/elements/ariel/arielcpu.h @@ -101,6 +101,7 @@ class ArielCPU : public SST::Component { { "fp_sp_scalar_ins", "Statistic for counting SP-FP Non-SIMD instructons", "instructions", 1 }, { "fp_sp_ops", "Statistic for counting SP-FP operations (inst * SIMD width)", "instructions", 1 }, { "cycles", "Statistic for counting cycles of the Ariel core.", "cycles", 1 }, + { "model_time", "Time spent in modeling", "ns", 1}, { "active_cycles", "Statistic for counting active cycles (cycles not idle) of the Ariel core.", "cycles", 1 }) SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( diff --git a/src/sst/elements/ariel/frontend/pin3/fesimple.cc b/src/sst/elements/ariel/frontend/pin3/fesimple.cc index cc9cde55d0..aa0a0f3626 100644 --- a/src/sst/elements/ariel/frontend/pin3/fesimple.cc +++ b/src/sst/elements/ariel/frontend/pin3/fesimple.cc @@ -471,6 +471,22 @@ VOID WriteInstructionWriteOnly(THREADID thr, ADDRINT* writeAddr, UINT32 writeSiz } +VOID WriteInstructionConditionalBranch(THREADID thr, ADDRINT ip) +{ + + // TODO: Currently, we just get the IP of the branch. We can + // get the target info in the future. + if(enable_output) { + if(thr < core_count) { + ArielCommand ac; + ac.command = ARIEL_CONDITIONAL_BRANCH; + ac.instPtr = (uint64_t) ip; + tunnel->writeMessage(thr, ac); + } + } + +} + VOID IncrementFunctionRecord(VOID* funcRecord) { ArielFunctionRecord* arielFuncRec = (ArielFunctionRecord*) funcRecord; @@ -570,6 +586,12 @@ VOID InstrumentInstruction(INS ins, VOID *v) IARG_UINT32, instClass, IARG_UINT32, simdOpWidth, IARG_END); + } else if( INS_IsBranch(ins) && INS_HasFallThrough(ins) ) { + INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) + WriteInstructionConditionalBranch, + IARG_THREAD_ID, + IARG_INST_PTR, + IARG_END); } else { INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) WriteNoOp,