diff --git a/cheshire.mk b/cheshire.mk index f838911b7..709fc9a94 100644 --- a/cheshire.mk +++ b/cheshire.mk @@ -63,7 +63,7 @@ chs-clean-deps: ###################### CHS_NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/cheshire-nonfree.git -CHS_NONFREE_COMMIT ?= 92f6f02 +CHS_NONFREE_COMMIT ?= a111e47 CHS_PHONY += chs-nonfree-init chs-nonfree-init: @@ -82,17 +82,26 @@ include $(CHS_ROOT)/sw/sw.mk # Generate HW # ############### +# `CHS_NUM_IRQ_HARTS` and `CHS_NUM_PLIC_SRCS` are used to generate register files. +# They must match the corresponding SystemVerilog parameters. +CHS_NUM_IRQ_HARTS ?= 1 +CHS_NUM_PLIC_SRCS ?= 58 +CHS_NUM_PLIC_PRIOW ?= 7 + # SoC registers $(CHS_ROOT)/hw/regs/cheshire_reg_pkg.sv $(CHS_ROOT)/hw/regs/cheshire_reg_top.sv: $(CHS_ROOT)/hw/regs/cheshire_regs.hjson $(REGTOOL) -r $< --outdir $(dir $@) # CLINT -CLINTCORES ?= 1 +CLINTCORES ?= $(CHS_NUM_IRQ_HARTS) include $(CLINTROOT)/clint.mk $(CLINTROOT)/.generated: flock -x $@ $(MAKE) clint && touch $@ # OpenTitan peripherals +$(CHS_ROOT)/hw/rv_plic.cfg.hjson: $(CHS_ROOT)/util/gen_pliccfg.py + $< $(CHS_NUM_IRQ_HARTS) $(CHS_NUM_PLIC_SRCS) $(CHS_NUM_PLIC_PRIOW) > $@ + include $(OTPROOT)/otp.mk $(OTPROOT)/.generated: $(CHS_ROOT)/hw/rv_plic.cfg.hjson flock -x $@ sh -c "cp $< $(dir $@)/src/rv_plic/; $(MAKE) -j1 otp" && touch $@ diff --git a/docs/um/arch.md b/docs/um/arch.md index cc5ea9390..730a15689 100644 --- a/docs/um/arch.md +++ b/docs/um/arch.md @@ -185,7 +185,7 @@ Cheshire provides a flexible RISC-V interrupt architecture that can route and mu First, all internal (`intr.intn`) and external (`intr_ext_i`) interrupt sources are collected (`intr`). From here, they either pass through an *interrupt router* if enabled (`IrqRouter`) or are simply fanned out to interrupt *targets*, which may support as many or fewer interrupt sources as provided by `intr`. If a target supports fewer sources, its interrupt sources are *truncated*. -Cheshire provides both a core-local interruptor (CLINT), grouping all per-core interrupts in one module, and a shared platform-level interrupt controller (PLIC). The former is used only for inter-processor and timer interrupts, while the latter is a proper interrupt target. If enabled (`Clic`), each CVA6 core also has a core-local interrupt controller (CLIC), another interrupt target. In addition to the PLIC and CLICs, any number external interrupt targets may be defined (`NumExtOutIntrTgts`) with their own number of incoming sources (`NumExtIrqHarts`). +Cheshire provides both a core-local interruptor (CLINT), grouping all per-core interrupts in one module, and a shared platform-level interrupt controller (PLIC). The former is used only for inter-processor and timer interrupts, while the latter is a proper interrupt target. If enabled (`Clic`), each CVA6 core also has a core-local interrupt controller (CLIC), another interrupt target. In addition to the PLIC and CLICs, any number of external interrupt targets may be defined (`NumExtOutIntrTgts`) with their own number of incoming sources (`NumExtOutIntrs`). Finally, the PLIC and grouped CLINT also support allocating external harts for which to manage interrupts (`NumExtIrqHarts`), i.e. harts without interrupt controllers of themselves. diff --git a/hw/bootrom/cheshire_bootrom.S b/hw/bootrom/cheshire_bootrom.S index 7680a9343..ff2e288f8 100644 --- a/hw/bootrom/cheshire_bootrom.S +++ b/hw/bootrom/cheshire_bootrom.S @@ -5,17 +5,20 @@ // Nicole Narr // Christopher Reinwardt // Paul Scheffler +// Enrico Zelioli -// TODO: Avoid hardcoding in addresses and offsets - -#include "smp.h" - +#include +#include .section .text._start // Minimal bootrom loader .global _start .align 4 _start: + // Globally disable Machine and Supervisor interrupts (MIE, SIE = 0). + // Note that this will *not* stop WFI from resuming on enabled interrupts. + csrrc zero, mstatus, 10 + // Reset all integer GPRs; we do *not* assume FP in the boot ROM. li x1, 0 li x4, 0 @@ -47,8 +50,12 @@ _start: li x30, 0 li x31, 0 - // Pause SMP harts - smp_pause(t0, t1) + // Pause SMP harts: enable only MSIE, disable all other interrupt sources + li t1, 0x8 + csrw mie, t1 + li t0, 0 + csrr t1, mhartid + bne t0, t1, _smp_wait // Init stack and global pointer with safe, linked values la sp, __stack_pointer$ @@ -58,33 +65,35 @@ _start: .option pop // If LLC present: Wait for end of BIST, then extend stack and set to all SPM - la t0, __base_regs - lw t0, 80(t0) // regs.HW_FEATURES - andi t0, t0, 2 // regs.HW_FEATURES.llc + la t0, __base_regs + lwu t0, CHESHIRE_HW_FEATURES_REG_OFFSET(t0) + andi t0, t0, (1 << CHESHIRE_HW_FEATURES_LLC_BIT) beqz t0, _prom_check_run - la t0, __base_llc + la t0, __base_llc _wait_llc_bist: - lw t1, 72(t0) // llc.BIST_STATUS_DONE_BIT + lwu t1, AXI_LLC_BIST_STATUS_REG_OFFSET(t0) // Check BIST status done bit beqz t1, _wait_llc_bist - li t1, -1 - sw t1, 0(t0) // llc.CFG_SPM_LOW - sw t1, 4(t0) // llc.CFG_SPM_HIGH - li t1, 1 - sw t1, 16(t0) // llc.CFG_COMMIT + li t1, -1 + sw t1, AXI_LLC_CFG_SPM_LOW_REG_OFFSET(t0) + sw t1, AXI_LLC_CFG_SPM_HIGH_REG_OFFSET(t0) + li t1, 1 + sw t1, AXI_LLC_COMMIT_CFG_REG_OFFSET(t0) // Correct stack to start at end of SPM - la t0, __base_regs - la sp, __base_spm - lw t0, 84(t0) // regs.LLC_SIZE - add sp, sp, t0 + la t0, __base_regs + la sp, __base_spm + lwu t0, CHESHIRE_LLC_SIZE_REG_OFFSET(t0) + add sp, sp, t0 addi sp, sp, -8 -// Enter Platform ROM if present. +// Enter Platform ROM if present. Falls through to `_boot`. _prom_check_run: // Note that we have internal access to SPM here *if and only if* there is an LLC. - la t0, __base_regs - lw t0, 72(t0) // regs.PLATFORM_ROM - beqz t0, _boot - jalr t0 + li t0, 0 + li t1, 0 + la ra, __base_regs + lwu ra, CHESHIRE_PLATFORM_ROM_REG_OFFSET(ra) + beqz ra, _boot + jalr ra, 0(ra) // Reset regs, full fence, then jump to main _boot: @@ -99,34 +108,44 @@ _boot: .align 4 _exit: // Save the return value to scratch register 2, try `ebreak`, then wait forever + // Set bit 0 to signal that the execution is done. slli a0, a0, 1 ori a0, a0, 1 la t0, __base_regs - sw a0, 8(t0) // regs.SCRATCH[2] + sw a0, CHESHIRE_SCRATCH_2_REG_OFFSET(t0) ebreak 1: wfi j 1b -.global boot_next_stage -.align 4 -boot_next_stage: - // Non-SMP hart: Write boot address into global scratch registers - la t0, __base_regs - sw a0, 16(t0) // regs.SCRATCH[4] - srli a0, a0, 32 - sw a0, 20(t0) // regs.SCRATCH[5] - fence - // Resume SMP harts - smp_resume(t0, t1, t2) - // Load boot address from global scratch registers - la t0, __base_regs - lwu t1, 20(t0) // regs.SCRATCH[5] +// Nonzero harts wait for and handle wakeup here +_smp_wait: + // Set mtvec, sleep, and check MSIP; repeat unless MSIP is set (M IPI is pending). + la t0, 1f + csrw t0, mtvec + wfi +1: csrr t0, mip + andi t0, t0, 0x8 + beqz t0, _smp_wait + // Clear CLINT M IPI register for this hart + la t0, __base_clint + csrr ra, mhartid + slli t1, ra, 2 + add t1, t1, t0 + sw zero, 0(t1) // *(CLINT_BASE + hart_id * 4) = 0 + // Check SMP enable (scratch[6][0]). + // If this is not set, go back to sleep, as we are not meant to resume. + la t0, __base_regs + lwu t0, CHESHIRE_SCRATCH_6_REG_OFFSET(t0) + andi t0, t0, 1 + beqz t0, _smp_wait + // Jump to scratch[5:4]. Synchronization, if any, should be done there. + la t1, __base_regs + lwu t0, CHESHIRE_SCRATCH_4_REG_OFFSET(t1) + lwu t1, CHESHIRE_SCRATCH_5_REG_OFFSET(t1) slli t1, t1, 32 - lwu t0, 16(t0) // regs.SCRATCH[4] - or t0, t0, t1 - // Store hartid to a0 - csrr a0, mhartid - // Jump to boot address - jalr ra, 0(t0) - // We should never get here - ret + ori ra, t1, t0 + li t0, 0 + li t1, 1 + jalr ra, 0(ra) + // Should we return, go back to `_start` to be parked again. + j _start diff --git a/hw/bootrom/cheshire_bootrom.c b/hw/bootrom/cheshire_bootrom.c index a1de1e1c2..1693e040b 100644 --- a/hw/bootrom/cheshire_bootrom.c +++ b/hw/bootrom/cheshire_bootrom.c @@ -18,8 +18,6 @@ #include "hal/uart_debug.h" #include "gpt.h" -extern int boot_next_stage(void *); - int boot_passive(uint64_t core_freq) { // Initialize UART with debug settings uart_debug_init(&__base_uart, core_freq); @@ -30,7 +28,7 @@ int boot_passive(uint64_t core_freq) { if (uart_debug_check(&__base_uart)) return uart_debug_serve(&__base_uart); // No UART (or JTAG) requests came in, but scratch[2][2] was set --> run code at scratch[1:0] scratch[2] = 0; - return boot_next_stage((void *)(uintptr_t)(((uint64_t)scratch[1] << 32) | scratch[0])); + return invoke((void *)(uintptr_t)(((uint64_t)scratch[1] << 32) | scratch[0])); } int boot_spi_sdcard(uint64_t core_freq, uint64_t rtc_freq) { diff --git a/hw/bootrom/cheshire_bootrom.sv b/hw/bootrom/cheshire_bootrom.sv index 38be46b01..1c6224238 100644 --- a/hw/bootrom/cheshire_bootrom.sv +++ b/hw/bootrom/cheshire_bootrom.sv @@ -29,7 +29,7 @@ module cheshire_bootrom #( always_comb begin data_o = '0; unique case (word) - 000: data_o = 32'h42014081 /* 0x0000 */; + 000: data_o = 32'h42014081 /* 0x0000 */; 001: data_o = 32'h43014281 /* 0x0004 */; 002: data_o = 32'h44014381 /* 0x0008 */; 003: data_o = 32'h45014481 /* 0x000c */; diff --git a/hw/cheshire_pkg.sv b/hw/cheshire_pkg.sv index 0163af701..02602dfd6 100644 --- a/hw/cheshire_pkg.sv +++ b/hw/cheshire_pkg.sv @@ -30,7 +30,6 @@ package cheshire_pkg; localparam int unsigned SlinkNumChan = serial_link_single_channel_reg_pkg::NumChannels; localparam int unsigned SlinkNumLanes = serial_link_single_channel_reg_pkg::NumBits/2; localparam int unsigned SlinkMaxClkDiv = 1 << serial_link_single_channel_reg_pkg::Log2MaxClkDiv; - localparam int unsigned ClintNumCores = clint_reg_pkg::NumCores; localparam int unsigned UsbNumPorts = spinal_usb_ohci_pkg::NumPhyPorts; // Default JTAG ID code type diff --git a/hw/cheshire_soc.sv b/hw/cheshire_soc.sv index b3b625443..af51d97c3 100644 --- a/hw/cheshire_soc.sv +++ b/hw/cheshire_soc.sv @@ -1714,19 +1714,25 @@ module cheshire_soc import cheshire_pkg::*; #( end - ////////////////// - // Assertions // - ////////////////// + /////////////////////////////// + // Elaboration-Time Checks // + /////////////////////////////// + + // Check that CLINT core count is equal to `NumIrqHarts` + if (clint_reg_pkg::NumCores != NumIrqHarts) + $fatal(1, "CLIC core count (%d) does not match `NumIrqHarts` (%d)", + clint_reg_pkg::NumCores, NumIrqHarts); + + // Check that PLIC target count is equal to `2*NumIrqHarts` (two privilege levels) + if (rv_plic_reg_pkg::NumTarget != 2 * NumIrqHarts) + $fatal(1, "PLIC target count (%d) does not match `2*NumIrqHarts` (%d)", + rv_plic_reg_pkg::NumTarget, 2 * NumIrqHarts); - // TODO: check that CVA6 and Cheshire config agree // TODO: check that all interconnect params agree // TODO: check that params with min/max values are within legal range - // TODO: check that CLINT and PLIC target counts are both `NumIntHarts + Cfg.NumExtHarts` - // TODO: check that (for now) `NumIntHarts == 1` // TODO: check that available user bits suffice to identify all masters // TODO: check that atomics user domain is nonzero // TODO: check that `ext` (IO) and internal types agree - // TODO: many other things I most likely forgot // TODO: check that LLC only exists if its output is connected (the reverse is allowed) endmodule diff --git a/hw/rv_plic.cfg.hjson b/hw/rv_plic.cfg.hjson index 865be3a9b..c476c7ad3 100644 --- a/hw/rv_plic.cfg.hjson +++ b/hw/rv_plic.cfg.hjson @@ -3,13 +3,15 @@ // SPDX-License-Identifier: Apache-2.0 // // Paul Scheffler +// Enrico Zelioli +// AUTOMATICALLY GENERATED by gen_pliccfg.py; edit the script instead. { instance_name: "rv_plic", param_values: { src: 58, - target: 2, // We need *two targets* per hart: M and S modes + target: 2, prio: 7, - nonstd_regs: 0 // Do *not* include these: MSIPs are not used and we use a 64 MiB address space + nonstd_regs: 0 }, } diff --git a/sw/boot/zsl.c b/sw/boot/zsl.c index ab2edb655..ed682f7bb 100644 --- a/sw/boot/zsl.c +++ b/sw/boot/zsl.c @@ -14,6 +14,7 @@ #include "gpt.h" #include "dif/uart.h" #include "printf.h" +#include "smp.h" // Type for firmware payload typedef int (*payload_t)(uint64_t, uint64_t, uint64_t); @@ -51,40 +52,48 @@ static inline void load_part_or_spin(void *priv, const uint64_t *pguid, void *co } int main(void) { - // Get system parameters - uint32_t bootmode = *reg32(&__base_regs, CHESHIRE_BOOT_MODE_REG_OFFSET); - uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); - uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500); - rgp = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_3_REG_OFFSET); - uint32_t read = *reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET); - void *priv = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_1_REG_OFFSET); - // Initialize UART - uart_init(&__base_uart, core_freq, __BOOT_BAUDRATE); + uint64_t hart_id = get_mhartid(); - // Print boot-critical cat, and also parameters - printf(" /\\___/\\ Boot mode: %d\r\n" - "( o o ) Real-time clock: %d Hz\r\n" - "( =^= ) System clock: %d Hz\r\n" - "( ) Read global ptr: 0x%08x\r\n" - "( P ) Read pointer: 0x%08x\r\n" - "( U # L ) Read argument: 0x%08x\r\n" - "( P )\r\n" - "( ))))))))))\r\n\r\n", - bootmode, rtc_freq, core_freq, rgp, read, priv); + if (hart_id == 0) { + // Get system parameters + uint32_t bootmode = *reg32(&__base_regs, CHESHIRE_BOOT_MODE_REG_OFFSET); + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t core_freq = clint_get_core_freq(rtc_freq, 2500); + rgp = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_3_REG_OFFSET); + uint32_t read = *reg32(&__base_regs, CHESHIRE_SCRATCH_0_REG_OFFSET); + void *priv = (void *)(uintptr_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_1_REG_OFFSET); - // If this is a GPT disk boot, load payload and device tree - if (read & 1) { - rread = (gpt_read_t)(void *)(uintptr_t)(read & ~1); - load_part_or_spin(priv, __BOOT_DTB_TYPE_GUID, __BOOT_ZSL_DTB, "device tree", 64); - load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192); + // Initialize UART + uart_init(&__base_uart, core_freq, __BOOT_BAUDRATE); + + // Print boot-critical cat, and also parameters + printf(" /\\___/\\ Boot mode: %d\r\n" + "( o o ) Real-time clock: %d Hz\r\n" + "( =^= ) System clock: %d Hz\r\n" + "( ) Read global ptr: 0x%08x\r\n" + "( P ) Read pointer: 0x%08x\r\n" + "( U # L ) Read argument: 0x%08x\r\n" + "( P )\r\n" + "( ))))))))))\r\n\r\n", + bootmode, rtc_freq, core_freq, rgp, read, priv); + + // If this is a GPT disk boot, load payload and device tree + if (read & 1) { + rread = (gpt_read_t)(void *)(uintptr_t)(read & ~1); + load_part_or_spin(priv, __BOOT_DTB_TYPE_GUID, __BOOT_ZSL_DTB, "device tree", 64); + load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192); + } + + // Launch payload + printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", __BOOT_ZSL_FW, + __BOOT_ZSL_DTB); + smp_resume(); } - // Launch payload payload_t fw = __BOOT_ZSL_FW; - printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", fw, __BOOT_ZSL_DTB); fencei(); - return fw(0, (uintptr_t)__BOOT_ZSL_DTB, 0); + return fw(hart_id, (uintptr_t)__BOOT_ZSL_DTB, 0); } // On trap, report relevant CSRs and spin diff --git a/sw/include/gpt.h b/sw/include/gpt.h index 0db228f91..aa62ca108 100644 --- a/sw/include/gpt.h +++ b/sw/include/gpt.h @@ -10,8 +10,6 @@ #include -extern int boot_next_stage(void *); - typedef int (*gpt_read_t)(void *priv, void *buf, uint64_t addr, uint64_t len); int gpt_check_signature(gpt_read_t read, void *priv); diff --git a/sw/include/smp.h b/sw/include/smp.h index d13d87579..e6870d793 100644 --- a/sw/include/smp.h +++ b/sw/include/smp.h @@ -1,49 +1,22 @@ -// Copyright 2023 ETH Zurich and University of Bologna. +// Copyright 2022 ETH Zurich and University of Bologna. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi +// Enrico Zelioli #pragma once -// The hart that non-SMP tests should run on -#ifndef NONSMP_HART -#define NONSMP_HART 0 -#endif +#include -// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume -#define smp_pause(reg1, reg2) \ - li reg2, 0x8; \ - csrw mie, reg2; \ - li reg1, NONSMP_HART; \ - csrr reg2, mhartid; \ - bne reg1, reg2, 2f +// Abstract type for shared atomic semaphore backed by an uncached platform register. +typedef volatile uint32_t* smp_sema_t; -#define smp_resume(reg1, reg2, reg3) \ - la reg1, __base_clint; \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 1:; \ - li reg2, 1; \ - sw reg2, 0(reg1); \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 1b; \ - 2:; \ - wfi; \ - csrr reg2, mip; \ - andi reg2, reg2, 0x8; \ - beqz reg2, 2b; \ - la reg1, __base_clint; \ - csrr reg2, mhartid; \ - slli reg2, reg2, 2; \ - add reg2, reg2, reg1; \ - sw zero, 0(reg2); \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 3:; \ - lw reg2, 0(reg1); \ - bnez reg2, 3b; \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 3b +// Initialize an uncached atomic semaphore to 0 and return. Check for NULL. +smp_sema_t smp_sema_init(int sid); + +// Wait for uncached atomic semaphore to reach a given value. +void smp_sema_wait(smp_sema_t sema, int value, uint64_t spin_period); + +// Shared barrier for all SMP cores. Uses a special reserved semaphore. +void smp_barrier(uint64_t spin_period); diff --git a/sw/include/util.h b/sw/include/util.h index 97463df40..5f10aa3a5 100644 --- a/sw/include/util.h +++ b/sw/include/util.h @@ -33,6 +33,10 @@ static inline void wfi() { asm volatile("wfi" ::: "memory"); } +static inline void nop() { + asm volatile("nop" ::: "memory"); +} + // Enables or disables M-mode timer interrupts. static inline void set_mtie(int enable) { if (enable) @@ -41,6 +45,29 @@ static inline void set_mtie(int enable) { asm volatile("csrc mie, %0" ::"r"(128) : "memory"); } +// Enables or disables M-mode software interrupts. +static inline void set_msie(int enable) { + if (enable) + asm volatile("csrs mie, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mie, %0" ::"r"(8) : "memory"); +} + +// Enables or disables M-mode software interrupts pending bit. +static inline void set_msip(int enable) { + if (enable) + asm volatile("csrs mip, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mip, %0" ::"r"(8) : "memory"); +} + +// Get M-mode software interrupts pending bit. +static inline uint64_t get_msip() { + uint64_t msip; + asm volatile("csrr %0, mip" : "=r"(msip)::"memory"); + return (msip & 0x8) >> 3; +} + // Enables or disables M-mode global interrupts. static inline void set_mie(int enable) { if (enable) @@ -49,6 +76,13 @@ static inline void set_mie(int enable) { asm volatile("csrci mstatus, 8" ::: "memory"); } +// Get hart id +static inline uint64_t get_mhartid() { + uint64_t mhartid; + asm volatile("csrr %0, mhartid" : "=r"(mhartid)::"memory"); + return mhartid; +} + // Get cycle count since reset static inline uint64_t get_mcycle() { uint64_t mcycle; diff --git a/sw/lib/crt0.S b/sw/lib/crt0.S index 5b68781c4..719aff299 100644 --- a/sw/lib/crt0.S +++ b/sw/lib/crt0.S @@ -5,6 +5,8 @@ // Nicole Narr // Christopher Reinwardt // Paul Scheffler +// Emanuele Parisi +// Enrico Zelioli .section .text._start @@ -15,28 +17,29 @@ _start: // Globally disable Machine and Supervisor interrupts csrrc x0, mstatus, 10 - // Park SMP harts - csrr t0, mhartid - beqz t0, 2f -1: - wfi - j 1b -2: - // Init stack and global pointer iff linked as nonzero - mv t1, sp - la t0, __stack_pointer$ - beqz t0, 1f - mv sp, t0 -1: .option push +_init_gp: + // Init global pointer iff linked as nonzero + .option push .option norelax la t0, __global_pointer$ - beqz t0, 1f + beqz t0, _init_sp mv gp, t0 -1: .option pop - + .option pop + +_init_sp: + // Init stack pointer iff linked as nonzero + mv t0, sp + la t1, __stack_pointer$ + beqz t1, _init_context + la t2, __stack_size$ + csrr t3, mhartid + mul t3, t3, t2 + sub sp, t1, t3 + +_init_context: // Store existing stack, global, return pointers on new stack addi sp, sp, -24 - sd t1, 0(sp) + sd t0, 0(sp) sd gp, 8(sp) sd ra, 16(sp) @@ -44,31 +47,6 @@ _start: la t0, _trap_handler_wrap csrrw x0, mtvec, t0 - // Zero the .bss section - la t0, __bss_start // t0 = bss start address - la t1, __bss_end // t1 = bss end address - sub t2, t1, t0 // t2 = #bytes to zero - li a0, 0 - -_zero_bss_loop: - addi t4, t2, -32 - blez t2, _fp_init // t2 <= 0? => No bss to zero - blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left - sd a0, 0(t0) - sd a0, 8(t0) - sd a0, 16(t0) - sd a0, 24(t0) - addi t2, t2, -32 - addi t0, t0, 32 - bgt t2, x0, _zero_bss_loop // Still more to go - j _fp_init - -_zero_bss_rem: - sb a0, 0(t0) - addi t2, t2, -1 - addi t0, t0, 1 - bgt t2, x0, _zero_bss_rem - _fp_init: // Set FS state to "Initial", enabling FP instructions li t1, 1 @@ -112,6 +90,40 @@ _fp_init: // Set FS state to "Clean" csrrc x0, mstatus, t1 +// Pause all harts except for hart 0 until a IPI is received. +// On wake-up every core resumes execution from the beginning of main(). +_smp_pause: + // Pause harts with hart ID != 0 + csrr t0, mhartid + bnez t0, _wait_for_ipi + +_zero_bss_init: + // Zero the .bss section + la t0, __bss_start // t0 = bss start address + la t1, __bss_end // t1 = bss end address + sub t2, t1, t0 // t2 = #bytes to zero + li a0, 0 + +_zero_bss_loop: + addi t4, t2, -32 + blez t2, _entry // t2 <= 0? => No bss to zero + blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left + sd a0, 0(t0) + sd a0, 8(t0) + sd a0, 16(t0) + sd a0, 24(t0) + addi t2, t2, -32 + addi t0, t0, 32 + bgt t2, x0, _zero_bss_loop // Still more to go + j _entry + +_zero_bss_rem: + sb a0, 0(t0) + addi t2, t2, -1 + addi t0, t0, 1 + bgt t2, x0, _zero_bss_rem + +_entry: // Full fence, then jump to main fence call main @@ -132,6 +144,25 @@ _exit: // Hand over to whatever called us, passing return ret +_wait_for_ipi: + csrs mie, 0x8 // Enable M-mode software interrupts +1: + wfi + csrr t0, mip + andi t0, t0, 0x8 + beqz t0, 1b + + // Received IPI -> clear MIP and CLINT IPI register + csrc mip, 0x8 + la t0, __base_clint + csrr t1, mhartid + slli t1, t1, 2 + add t1, t1, t0 + sw zero, 0(t1) // *(CLINT_BASE + hart_id * 4) = 0 + + // Resume execution of non-smp harts at beginning of main + j _entry + // This wraps the C trap handler to save the (integer-only) caller-save // registers and perform a proper machine-mode exception return. .align 4 diff --git a/sw/lib/gpt.c b/sw/lib/gpt.c index f149ba87a..eae07cc4c 100644 --- a/sw/lib/gpt.c +++ b/sw/lib/gpt.c @@ -86,5 +86,5 @@ int gpt_boot_part_else_raw(gpt_read_t read, void *priv, void *code_buf, uint64_t *reg32(&__base_regs, CHESHIRE_SCRATCH_1_REG_OFFSET) = (uintptr_t)priv; *reg32(&__base_regs, CHESHIRE_SCRATCH_3_REG_OFFSET) = (uintptr_t)gprw(0); // Invoke code - return boot_next_stage((void *)code_buf); + return invoke((void *)code_buf); } diff --git a/sw/lib/smp.c b/sw/lib/smp.c new file mode 100644 index 000000000..d5193ce35 --- /dev/null +++ b/sw/lib/smp.c @@ -0,0 +1,61 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi +// Enrico Zelioli +// Paul Scheffler + +#include "smp.h" + +#include "util.h" +#include "regs/cheshire.h" +#include "params.h" + +// Unpark specified harts and send them to a given destination. +// Note that these harts are *not* set up and `dst` should set them up accordingly. +// This is a runtime function and should not be used in general BMPs. +void _smp_unpark(void* dst) { + // Only hart 0 should unpark harts + if (get_mhartid()) return; + // Set destination address. + *reg32(&__base_regs, CHESHIRE_SCRATCH_5_REG_OFFSET) = (uintptr_t)(dst) >> 32; + *reg32(&__base_regs, CHESHIRE_SCRATCH_4_REG_OFFSET) = (uintptr_t)(dst); + // Set SMP resume enable + *reg32(&__base_regs, CHESHIRE_SCRATCH_6_REG_OFFSET) = 1; + // Wake up harts + uint32_t num_int_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET); + for (uint32_t i = 1; i < num_int_harts; i++) { + // Set interrupt + *reg32(&__base_clint, i << 2) = 1; + // Wait for interrupt pending to be cleared + while (*reg32(&__base_clint, i << 2)) {} + } + // Unset SMP resume enable. + *reg32(&__base_regs, CHESHIRE_SCRATCH_6_REG_OFFSET) = 0; +} + +// Park all active nonzero harts entering this function by sending them to the boot ROM. +// This is a runtime function and should not be used in general BMPs. +void _smp_park() { + if (get_mhartid() == 0) return; + invoke((void *)(uintptr_t)&__base_bootrom); +} + +smp_sema_t smp_sema_get(int sid) { + // We can only allocate as many semaphores as we have platform scratch registers. + if (sid < 0 || sid >= CHESHIRE_SCRATCH_MULTIREG_COUNT - 8) return NULL; + return reg32(&__base_regs, CHESHIRE_SCRATCH_8_REG_OFFSET)[sid]; +} + +void smp_sema_wait(smp_sema_t sema, int value, uint64_t spin_period) { + while (*sema != value) { for (uint64_t i = 0; i < spin_period; ++i) nop(); } +} + +void smp_barrier(uint64_t spin_period) { + uint32_t num_int_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET); + volatile uint32_t *sema = reg32(&__base_regs, CHESHIRE_SCRATCH_7_REG_OFFSET); + __atomic_fetch_add(sema, 1, __ATOMIC_RELAXED); + smp_sema_wait(-1, num_int_harts, spin_period); + *sema = 0; +} diff --git a/sw/link/common.ldh b/sw/link/common.ldh index d55a52498..7eb308c3e 100644 --- a/sw/link/common.ldh +++ b/sw/link/common.ldh @@ -29,6 +29,10 @@ SECTIONS { __global_pointer$ = ADDR(.misc) + SIZEOF(.misc) / 2; __stack_pointer$ = 0; + /* Stack size */ + /* Use a default stack size of 4KiB */ + __stack_size$ = 0x1000; + /* Further addresses */ __base_dma = 0x01000000; __base_bootrom = 0x02000000; diff --git a/sw/link/rom.ld b/sw/link/rom.ld index 29550ae2c..f386645c5 100644 --- a/sw/link/rom.ld +++ b/sw/link/rom.ld @@ -9,6 +9,8 @@ INCLUDE common.ldh SECTIONS { + __stack_pointer$ = ORIGIN(spm) + LENGTH(spm) - 8; + /* Put all LOAD sections in one contiguous output section */ .misc : { *(.text._start) diff --git a/sw/link/spm.ld b/sw/link/spm.ld index bedd94734..ff96285cc 100644 --- a/sw/link/spm.ld +++ b/sw/link/spm.ld @@ -9,6 +9,8 @@ INCLUDE common.ldh SECTIONS { + __stack_pointer$ = ORIGIN(spm) + LENGTH(spm) - 8; + .text : { *(.text._start) *(.text) diff --git a/sw/tests/smp_hello.c b/sw/tests/smp_hello.c new file mode 100644 index 000000000..2203b4a11 --- /dev/null +++ b/sw/tests/smp_hello.c @@ -0,0 +1,42 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nicole Narr +// Christopher Reinwardt +// Emanuele Parisi +// Enrico Zelioli +// Paul Scheffler +// +// Simple SMP Hello World. + +int smain(int hid, int hnum) { + // Define a reasonable semaphore spin period to reduce contention. + const uint64_t SP = 20; + + // Get and check SMP semaphore. + smp_sema_t sema = smp_sema_get(0); + CHECK_ASSERT(-1, sema != NULL) + + // Only hart 0 initializes UART and semaphore. + // A barrier ensures that all harts wait for initialization. + if (hid == 0) { + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); + uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE); + *sema = 0; + } + smp_barrier(SP); + + // Let each hart print sequentially using a atomically incremented semaphore. + // Finally, use the semaphore to wait until the last hart has printed. + smp_sema_wait(sema, hid, SP); + printf("Hi from hart %d/%d\r\n", hid, hnum); + uart_write_flush(&__base_uart); + __atomic_fetch_add(sema, 1, __ATOMIC_RELAXED); + smp_sema_wait(sema, hnum, SP); + + // Double-check that our semaphore is now equal to the internal hart count. + // Only hart 0's return code is checked. + return (*sema == hnum); +} diff --git a/target/sim/src/tb_cheshire_pkg.sv b/target/sim/src/tb_cheshire_pkg.sv index 8197b2dfa..7b5f882db 100644 --- a/target/sim/src/tb_cheshire_pkg.sv +++ b/target/sim/src/tb_cheshire_pkg.sv @@ -23,14 +23,22 @@ package tb_cheshire_pkg; return ret; endfunction + // A dedicated dual-core config + function automatic cheshire_cfg_t gen_cheshire_dualcore_cfg(); + cheshire_cfg_t ret = DefaultCfg; + ret.NumCores = 2; + return ret; + endfunction + // Number of Cheshire configurations - localparam int unsigned NumCheshireConfigs = 32'd3; + localparam int unsigned NumCheshireConfigs = 32'd4; // Assemble a configuration array indexed by a numeric parameter localparam cheshire_cfg_t [NumCheshireConfigs-1:0] TbCheshireConfigs = { - gen_cheshire_clic_cfg(), // 2: CLIC-enabled configuration - gen_cheshire_rt_cfg(), // 1: RT-enabled configuration - DefaultCfg // 0: Default configuration + gen_cheshire_dualcore_cfg(), // 3: Dual-core configuration + gen_cheshire_clic_cfg(), // 2: CLIC-enabled configuration + gen_cheshire_rt_cfg(), // 1: RT-enabled configuration + DefaultCfg // 0: Default configuration }; endpackage diff --git a/util/gen_bootrom.py b/util/gen_bootrom.py index 7d841f374..0f2f988fa 100755 --- a/util/gen_bootrom.py +++ b/util/gen_bootrom.py @@ -21,28 +21,28 @@ # Parse arguments. parser = argparse.ArgumentParser(description="Generate thestral_bootrom.sv") -parser.add_argument("BINARY", - help="Binary image for which to create a bootrom") parser.add_argument( - "--sv-module", - "-m", + "BINARY", + help="Binary image for which to create a bootrom" +) +parser.add_argument( + "--sv-module", "-m", metavar="BINARY", - help= - "Combinatorial SystemVerilog module with `reg_interface`. Name of the SystemVerilog module" + help="Combinatorial SystemVerilog module with `reg_interface`. Name of the SystemVerilog module" ) # Parse arguments. parser.add_argument( - "-p", - "--pad", + "-p", "--pad", action="store", default=0, type=int, - help= - "Pad to next power of two (if the value is not a power of two it is rounded appropriately)" + help="Pad to next power of two (if the value is not a power of two it is rounded appropriately)" +) +parser.add_argument( + "--arm-rom", + action="store_true", + help="Generate am Arm ROM code file." ) -parser.add_argument("--arm-rom", - action="store_true", - help="Generate am Arm ROM code file.") args = parser.parse_args() # Read the bootrom binary. @@ -127,7 +127,7 @@ def format_binary(binary): always_comb begin data_o = '0; unique case (word) - {words}; + {words}; default: data_o = '0; endcase end diff --git a/util/gen_pliccfg.py b/util/gen_pliccfg.py new file mode 100755 index 000000000..4552b5c21 --- /dev/null +++ b/util/gen_pliccfg.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Nicole Narr +# Christopher Reinwardt +# +# Fabian Schuiki +# Florian Zaruba +# Stefan Mach +# Thomas Benz +# Paul Scheffler +# Wolfgang Roenninger +# Gianna Paulin +# Tim Fischer +# Enrico Zelioli + +import os +import argparse + +# Parse arguments. +parser = argparse.ArgumentParser(description="Generate rv_plic.cfg.hjson") +parser.add_argument( + "num_cores", + type=int, + help="Number of attached cores." +) +parser.add_argument( + "num_srcs", + type=int, + help="Bit width of interrupt priority." +) +parser.add_argument( + "prio_width", + type=int, + help="Bit width of interrupt priority." +) +args = parser.parse_args() + +# We need *two targets* per hart: M and S modes +num_targets = 2 * args.num_cores + +# Emit the code. +print(""" +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Paul Scheffler +// Enrico Zelioli +// AUTOMATICALLY GENERATED by {script}; edit the script instead. + +{{ + instance_name: \"rv_plic\", + param_values: {{ + src: {num_srcs}, + target: {num_targets}, + prio: {prio_width}, + nonstd_regs: 0 + }}, +}} + """.strip().format( + script=os.path.basename(__file__), + num_srcs=args.num_srcs, + num_targets=num_targets, + prio_width=args.prio_width +))