diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f1fc9094..22db89141 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| +| 04.01.2025 | 1.10.8.8 | :sparkles: add inter-core communication (ICC) for the SMP dual-core setup | [#1142](https://github.com/stnolting/neorv32/pull/1142) | | 03.01.2025 | 1.10.8.7 | :warning: :sparkles: replace `Zalrsc` ISA extensions (reservation-set operations) by `Zaamo` ISA extension (atomic read-modify-write operations) | [#1141](https://github.com/stnolting/neorv32/pull/1141) | | 01.01.2025 | 1.10.8.6 | :sparkles: :test_tube: add smp dual-core option | [#1135](https://github.com/stnolting/neorv32/pull/1135) | | 29.12.2024 | 1.10.8.5 | :test_tube: add multi-hart support to debug module | [#1132](https://github.com/stnolting/neorv32/pull/1132) | diff --git a/README.md b/README.md index 7bcb79f35..046312da5 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,8 @@ setup according to your needs. Note that all of the following SoC modules are en **CPU Core** * [![RISCV-ARCHID](https://img.shields.io/badge/RISC--V%20Architecture%20ID-19-000000.svg?longCache=true&style=flat-square&logo=riscv&colorA=273274&colorB=fbb517)](https://github.com/riscv/riscv-isa-manual/blob/master/marchid.md) -* RISC-V 32-bit little-endian single- or SMP-dual-core pipelined/multi-cycle modified Harvard architecture +* RISC-V 32-bit little-endian pipelined/multi-cycle modified Harvard architecture +* Single-core or SMP dual-core configuration (including low-latency inter-core communication) * configurable [instruction sets and extensions](https://stnolting.github.io/neorv32/#_instruction_sets_and_extensions): \ `RV32` diff --git a/docs/datasheet/cpu.adoc b/docs/datasheet/cpu.adoc index ded6effe6..780c7fafd 100644 --- a/docs/datasheet/cpu.adoc +++ b/docs/datasheet/cpu.adoc @@ -62,25 +62,33 @@ type of all signals is _std_ulogic_ or _std_ulogic_vector_, respectively. The "D direction as seen from the CPU. .NEORV32 CPU Signal List -[cols="<3,^3,^1,<5"] +[cols="^2,^2,^1,<8"] [options="header", grid="rows"] |======================= | Signal | Width/Type | Dir | Description 4+^| **Global Signals** -| `clk_i` | 1 | in | Global clock line, all registers triggering on rising edge. -| `rstn_i` | 1 | in | Global reset, low-active. +| `clk_i` | 1 | in | Global clock line, all registers triggering on rising edge. +| `rstn_i` | 1 | in | Global reset, low-active. 4+^| **Interrupts (<<_traps_exceptions_and_interrupts>>)** -| `msi_i` | 1 | in | RISC-V machine software interrupt. -| `mei_i` | 1 | in | RISC-V machine external interrupt. -| `mti_i` | 1 | in | RISC-V machine timer interrupt. -| `firq_i` | 16 | in | Custom fast interrupt request signals. -| `dbi_i` | 1 | in | Request CPU to halt and enter debug mode (RISC-V <<_on_chip_debugger_ocd>>). +| `msi_i` | 1 | in | RISC-V machine software interrupt. +| `mei_i` | 1 | in | RISC-V machine external interrupt. +| `mti_i` | 1 | in | RISC-V machine timer interrupt. +| `firq_i` | 16 | in | Custom fast interrupt request signals. +| `dbi_i` | 1 | in | Request CPU to halt and enter debug mode (RISC-V <<_on_chip_debugger_ocd>>). 4+^| **Instruction <<_bus_interface>>** -| `ibus_req_o` | `bus_req_t` | out | Instruction fetch bus request. -| `ibus_rsp_i` | `bus_rsp_t` | in | Instruction fetch bus response. +| `ibus_req_o` | `bus_req_t` | out | Instruction fetch bus request. +| `ibus_rsp_i` | `bus_rsp_t` | in | Instruction fetch bus response. 4+^| **Data <<_bus_interface>>** -| `dbus_req_o` | `bus_req_t` | out | Data access (load/store) bus request. -| `dbus_rsp_i` | `bus_rsp_t` | in | Data access (load/store) bus response. +| `dbus_req_o` | `bus_req_t` | out | Data access (load/store) bus request. +| `dbus_rsp_i` | `bus_rsp_t` | in | Data access (load/store) bus response. +4+^| **<<_inter_core_communication_icc>> TX links** +| `icc_tx_rdy_o` | 2 | out | Data available for cores `0..1`. +| `icc_tx_ack_i` | 2 | in | Read-enable from cores `0..1`. +| `icc_tx_dat_o` | 2*32 | out | Data for cores `0..1`. +4+^| **<<_inter_core_communication_icc>> RX links** +| `icc_rx_rdy_i` | 2 | in | Data available from cores `0..1`. +| `icc_rx_ack_o` | 2 | out | Read-enable for cores `0..1`. +| `icc_rx_dat_i` | 2*32 | in | Data from cores `0..1`. |======================= .Bus Interface Protocol @@ -109,8 +117,9 @@ The generic type "suv(x:y)" represents a `std_ulogic_vector(x downto y)`. [options="header",grid="rows"] |======================= | Name | Type | Description -| `HART_ID` | natural | Value for the <<_mhartid>> CSR. -| `VENDOR_ID` | suv(31:0) | Value for the <<_mvendorid>> CSR. +| `HART_ID` | natural | ID of the core (for <<_mhartid>> CSR). +| `NUM_HARTS` | natural | Total number of cores in the system. +| `VENDOR_ID` | suv(31:0) | Vendor identification (for <<_mvendorid>> CSR). | `BOOT_ADDR` | suv(31:0) | CPU reset address. See section <<_address_space>>. | `DEBUG_PARK_ADDR` | suv(31:0) | "Park loop" entry address for the <<_on_chip_debugger_ocd>>, has to be 4-byte aligned. | `DEBUG_EXC_ADDR` | suv(31:0) | "Exception" entry address for the <<_on_chip_debugger_ocd>>, has to be 4-byte aligned. diff --git a/docs/datasheet/cpu_csr.adoc b/docs/datasheet/cpu_csr.adoc index de64b19e6..48f137b4e 100644 --- a/docs/datasheet/cpu_csr.adoc +++ b/docs/datasheet/cpu_csr.adoc @@ -57,8 +57,6 @@ to check if the targeted bits can actually be modified. | 0x7b0 | <<_dcsr>> | - | DRW | Debug control and status register | 0x7b1 | <<_dpc>> | - | DRW | Debug program counter | 0x7b2 | <<_dscratch0>> | - | DRW | Debug scratch register 0 -5+^| **<<_custom_functions_unit_cfu_csrs>>** -| 0x800 .. 0x803 | <<_cfureg, `cfureg0`>> .. <<_cfureg, `cfureg3`>> | `CSR_CFUCREG0` .. `CSR_CFUCREG3` | URW | Custom CFU registers 0 to 3 5+^| **<<_machine_counter_and_timer_csrs>>** | 0xb00 | <<_mcycleh, `mcycle`>> | `CSR_MCYCLE` | MRW | Machine cycle counter low word | 0xb02 | <<_minstreth, `minstret`>> | `CSR_MINSTRET` | MRW | Machine instruction-retired counter low word @@ -79,7 +77,11 @@ to check if the targeted bits can actually be modified. | 0xf14 | <<_mhartid>> | `CSR_MHARTID` | MRO | Machine hardware thread ID | 0xf15 | <<_mconfigptr>> | `CSR_MCONFIGPTR` | MRO | Machine configuration pointer register 5+^| **<<_neorv32_specific_csrs>>** -| 0xfc0 | <<_mxisa>> | `CSR_MXISA` | MRO | NEORV32-specific "eXtended" machine CPU ISA and extensions +| 0xbc0 | <<_mxiccrxd>> | `CSR_MXICCRXD` | MRW | ICC RX data +| 0xbc1 | <<_mxicctxd>> | `CSR_MXICCTXD` | MRW | ICC TX data +| 0xbc2 .. 0xbc3 | <<_mxiccsr, `mxiccsr0`>> .. <<_mxiccsr, `mxiccsr0`>> | `CSR_MXICCSR0` .. `CSR_MXICCSR3` | MRW | ICC control and status +| 0x800 .. 0x803 | <<_cfureg, `cfureg0`>> .. <<_cfureg, `cfureg3`>> | `CSR_CFUCREG0` .. `CSR_CFUCREG3` | URW | Custom CFU registers 0 to 3 +| 0xfc0 | <<_mxisa>> | `CSR_MXISA` | MRO | Extended machine CPU ISA and extensions |======================= @@ -595,28 +597,6 @@ implementation of the according modes. |======================= -<<< -// #################################################################################################################### -:sectnums: -==== Custom Functions Unit (CFU) CSRs - -[discrete] -===== **`cfureg`** - -[cols="<1,<8"] -[frame="topbot",grid="none"] -|======================= -| Name | Custom (user-defined) CFU CSRs -| Address | `0x800` (`cfureg0`) -| | `0x801` (`cfureg1`) -| | `0x802` (`cfureg2`) -| | `0x803` (`cfureg3`) -| Reset value | `0x00000000` -| ISA | `Zicsr` & `Zxcfu` -| Description | User-defined CSRs to be used within the <<_custom_functions_unit_cfu>>. -|======================= - - <<< // #################################################################################################################### :sectnums: @@ -929,12 +909,92 @@ core's hart ID is unique starting at 0 for the first core. :sectnums: ==== NEORV32-Specific CSRs +.RISC-V-Compliant Mapping [NOTE] -All NEORV32-specific CSRs are mapped to addresses that are explicitly reserved for custom **Machine-Mode, read-only** CSRs -(assured by the RISC-V privileged specifications). Hence, these CSRs can only be accessed when in machine-mode. Any access -outside of machine-mode will raise an illegal instruction exception. +All NEORV32-specific CSRs are mapped to addresses that are explicitly reserved for +custom/implementation-specific use (assured by the RISC-V privileged specifications). + + +[discrete] +===== **`cfureg`** + +[cols="<1,<8"] +[frame="topbot",grid="none"] +|======================= +| Name | Custom (user-defined) CFU CSRs +| Address | `0x800` (`cfureg0`) +| | `0x801` (`cfureg1`) +| | `0x802` (`cfureg2`) +| | `0x803` (`cfureg3`) +| Reset value | `0x00000000` +| ISA | `Zicsr` & `Zxcfu` +| Description | User-defined CSRs to be used within the <<_custom_functions_unit_cfu>>. +|======================= + + +{empty} + +[discrete] +===== **`mxiccrxd`** + +[cols="<1,<8"] +[frame="topbot",grid="none"] +|======================= +| Name | <<_inter_core_communication_icc>> RX data +| Address | `0xbc0` +| Reset value | `0x00000000` +| ISA | `Zicsr` & `X` +| Description | RX data from selected link. Buffered by a 4-entries-deep and 32-bit wide FIFO. +This CSR is hardwired to all-zero if there is just a single CPU core in the system. +|======================= + + +{empty} + +[discrete] +===== **`mxicctxd`** + +[cols="<1,<8"] +[frame="topbot",grid="none"] +|======================= +| Name | <<_inter_core_communication_icc>> TX data +| Address | `0xbc1` +| Reset value | `0x00000000` +| ISA | `Zicsr` & `X` +| Description | TX data for selected link. Buffered by a 4-entries-deep and 32-bit wide FIFO. +This CSR is hardwired to all-zero if there is just a single CPU core in the system. +|======================= +{empty} + +[discrete] +===== **`mxiccsr`** +[cols="<1,<8"] +[frame="topbot",grid="none"] +|======================= +| Name | <<_inter_core_communication_icc>> control and status +| Address | `0xbc2` (`mxiccsr0`) +| | `0xbc3` (`mxiccsr1`) +| Reset value | `0x40000000` +| ISA | `Zicsr` & `X` +| Description | Link selection and status. Note that `mxiccsr1` is just a mirrored copy of `mxiccsr0`. +This CSR is hardwired to all-zero if there is just a single CPU core in the system. +|======================= + +.`mxiccsr` CSR Bits +[cols="^1,^2,^1,<5"] +[options="header",grid="rows"] +|======================= +| Bit | Name [C] | R/W | Description +| 1:0 | `CSR_MXICCSR_LINK_MSB : CSR_MXICCSR_LINK_LSB` | r/w | Link select. The value in this memory corresponds +to the ID of the core to which a connection is to be established via a link. The ICC data registers <<_mxiccrxd>> +and <<_mxicctxd>> will only access the queue FIFOs of the selected link. Note that only bit 0 is writable. Bit 1 +is hardwaired to zero. +| 29:2 | - | r/- | Reserved; hardwired to zero. +| 30 | `CSR_MXICCSR_TX_FREE` | r/- | Set if there is free space for TX data for the selected link. +| 31 | `CSR_MXICCSR_RX_AVAIL` | r/- | Set if RX data from the selected link is available. +|======================= + + +{empty} + [discrete] ===== **`mxisa`** @@ -946,7 +1006,7 @@ outside of machine-mode will raise an illegal instruction exception. | Reset value | `DEFINED` | ISA | `Zicsr` & `X` | Description | The `mxisa` CSRs is a NEORV32-specific read-only CSR that helps machine-mode software to -discover ISA sub-extensions and CPU configuration options +discover additional ISA (sub-)extensions and CPU configuration options. |======================= .`mxisa` CSR Bits @@ -985,5 +1045,5 @@ discover ISA sub-extensions and CPU configuration options | 28 | `CSR_MXISA_RFHWRST` | r/- | full hardware reset of register file available when set (`CPU_RF_HW_RST_EN`), see <<_cpu_tuning_options>> | 29 | `CSR_MXISA_FASTMUL` | r/- | fast multiplication available when set (`CPU_FAST_MUL_EN`), see <<_cpu_tuning_options>> | 30 | `CSR_MXISA_FASTSHIFT` | r/- | fast shifts available when set (`CPU_FAST_SHIFT_EN`), see <<_cpu_tuning_options>> -| 31 | `CSR_MXISA_IS_SIM` | r/- | set if CPU is being **simulated** (⚠️ not guaranteed) +| 31 | `CSR_MXISA_IS_SIM` | r/- | set if CPU is being **simulated** |======================= diff --git a/docs/datasheet/cpu_dual_core.adoc b/docs/datasheet/cpu_dual_core.adoc index 1e6acec4a..a70e0ed0f 100644 --- a/docs/datasheet/cpu_dual_core.adoc +++ b/docs/datasheet/cpu_dual_core.adoc @@ -1,9 +1,9 @@ :sectnums: === Dual-Core Configuration -.Hardware Requirements -[IMPORTANT] -The SMP dual-core configuration requires the <<_core_local_interruptor_clint>> to be implemented. +.Dual-Core Example +[TIP] +A simple dual-core example program can be found in `sw/example/demo_dual_core`. Optionally, the CPU core can be implemented as **symmetric multiprocessing (SMP) dual-core** system. This dual-core configuration is enabled by the `DUAL_CORE_EN` <<_processor_top_entity_generics, top generic>>. @@ -14,10 +14,10 @@ of both core complexes are further switched into a single system bus using a rou image::smp_system.png[align=center] -Both CPU cores are fully identical and use the same configuration provided by the according -<<_processor_top_entity_generics, top generics>>. However, each core can be identified by the according -"hart ID" that can be retrieved from the <<_mhartid>> CSR. CPU core 0 (the _primary_ core) has `mhartid = 0` -while core 1 (the _secondary_ core) has `mhartid = 1`. +Both CPU cores are fully identical and use the same ISA, tuning and cache configurations provided by the +according <<_processor_top_entity_generics, top generics>>. However, each core can be identified by the +according "hart ID" that can be retrieved from the <<_mhartid>> CSR. CPU core 0 (the _primary_ core) has +`mhartid = 0` while core 1 (the _secondary_ core) has `mhartid = 1`. The following table summarizes the most important aspects when using the dual-core configuration. @@ -41,32 +41,63 @@ while the top of stack of core 1 has to be explicitly defined by core 0 (see <<_ cores share the same heap, `.data` and `.bss` sections. | **Constructors and destructors** | Constructors and destructors are executed on core 0 only. (see ) +| **Core communication** | See section <<_inter_core_communication_icc>>. | **Bootloader** | Only core 0 will boot and execute the bootloader while core 1 is held in standby. -| **Booting** | See next section <<_dual_core_boot>>. +| **Booting** | See section <<_dual_core_boot>>. |======================= -.Dual-Core Example + +==== Inter-Core Communication (ICC) + +Both cores can communicate with each other via a direct point-to-point connection based on FIFO-like message +queues. These direct communication links are faster (in terms of latency) compared to a memory-mapped or +shared-memory communication. Additionally, communication using these links is guaranteed to be atomic. + +The inter-core communication (ICC) module is implemented as dedicated hardware module within each CPU core +(VHDL file `rtl/core/neorv32_cpu_icc.vhd`). This module is automatically included if the dual-core option +is enabled. Each core provides a 32-bit wide and 4 entries deep FIFO for sending data to the other core. +Hence, there are two FIFOs: one for sending data from core 0 to core 1 and another one for sending data the +opposite way. + +The ICC communication links are accessed via NEORV32-specific CSRs. Hence, those FIFOs are accessible only +by the CPU core itself and cannot be accessed by the DMA or any other CPU core. In total, three CSRs are +provided to handle communications: + +The <<_mxiccsr>> is used to select the core with which to communicate. In the dual-core configuration core 1 +can only select core 0 and vice versa. The core selection in this register allows access to the according +message FIFOs via the two other CSRs. Additionally, the CSR provides status flags (TX FIFO data available; +RX FIFO free space) related to the selected communication link. + +The <<_mxiccrxd>> and <<_mxicctxd>> CSRs are used for the actual data read and write operations. Writing data +to <<<<_mxicctxd>>> will send to the message queue of the core selected by <<_mxiccsr>>. Conversely, reading +data from <<_mxiccrxd>> will return data received from the core selected by <<_mxiccsr>>. + +The ICC FIFOs do not provide any interrupt capabilities. Software is expected to use the machine-software +interrupt of the receiving core (provided by the <<_core_local_interruptor_clint>>) to inform it about +available messages. + +.ICC Software API [TIP] -A simple dual-core example setup / test program can be found in `sw/example/demo_dual_core`. +The NEORV32 software framework provides API wrappers to abstract inter-core communication: +`sw/lib/include/noevr32_smp.h` ==== Dual-Core Boot -After reset both cores start booting. However, core 1 will always (regardless of the boot configuration) enter -sleep mode inside the default <<_start_up_code_crt0>> that is linked with any compiled application. The primary -core (core 0) will continue booting executing either the <<_bootloader>> or the pre-installed image in the -internal instruction memory (depending on the <<_boot_configuration>>). +After reset, both cores start booting. However, core 1 will - regardless of the <<_boot_configuration>> - always +enter <<_sleep_mode>> right inside the default <<_start_up_code_crt0>> that is linked with any compiled +application. The primary core (core 0) will continue booting, executing either the <<_bootloader>> or the +pre-installed image from the internal instruction memory (depending on the boot configuration). -To boot-up core 1 the primary core has to use a special library function provided by the NEORV32 runtime -environment (RTE): +To boot-up core 1, the primary core has to use a special library function provided by the NEORV32 software framework: .CPU Core 1 launch function prototype (note that this function can only be executed on core 0) [source,c] ---- -int neorv32_rte_smp_launch(void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); +int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); ---- -When executed, core 0 will populate a configuration structure in main memory that contain the entry point +When executed, core 0 use the <<_inter_core_communication_icc>> to send launch data that includes the entry point for core 1 (via `entry_point`) and the actual stack configuration (via `stack_memory` and `stack_size_bytes`). .Core 1 Stack Memory @@ -78,5 +109,5 @@ boundary.´ After that, the primary core triggers the _machine software interrupt_ of core 1 using the <<_core_local_interruptor_clint>>. Core 1 wakes up from sleep mode, consumes the configuration structure and -finally starts executing at the provided entry point. When `neorv32_rte_smp_launch()` returns (with no error +finally starts executing at the provided entry point. When `neorv32_smp_launch()` returns (with no error code) the secondary core is online and running. diff --git a/docs/datasheet/overview.adoc b/docs/datasheet/overview.adoc index b17477bbb..c58f69a3b 100644 --- a/docs/datasheet/overview.adoc +++ b/docs/datasheet/overview.adoc @@ -194,6 +194,7 @@ neorv32_top.vhd - NEORV32 PROCESSOR/SOC TOP ENTITY ││└neorv32_cpu_cp_shifter.vhd - Bit-shift co-processor (base ISA) │├neorv32_cpu_control.vhd - CPU control, exception system and CSRs ││└neorv32_cpu_decompressor.vhd - Compressed instructions decoder (C ext.) +│├neorv32_cpu_icc.vhd - Inter-core communication unit │├neorv32_cpu_lsu.vhd - Load/store unit │├neorv32_cpu_pmp.vhd - Physical memory protection unit (Smpmp ext.) │└neorv32_cpu_regfile.vhd - Data register file diff --git a/docs/datasheet/software.adoc b/docs/datasheet/software.adoc index ad58c249a..8efccb943 100644 --- a/docs/datasheet/software.adoc +++ b/docs/datasheet/software.adoc @@ -80,6 +80,7 @@ The NEORV32 HAL consists of the following files. | `neorv32_rte.c` | `neorv32_rte.h` | <<_neorv32_runtime_environment>> | `neorv32_sdi.c` | `neorv32_sdi.h` | <<_serial_data_interface_controller_sdi>> HAL | `neorv32_slink.c` | `neorv32_slink.h` | <<_stream_link_interface_slink>> HAL +| `neorv32_smp.c` | `neorv32_smp.h` | HAL for the SMP <<_dual_core_configuration>> | `neorv32_spi.c` | `neorv32_spi.h` | <<_serial_peripheral_interface_controller_spi>> HAL | `neorv32_sysinfo.c` | `neorv32_sysinfo.h` | <<_system_configuration_information_memory_sysinfo>> HAL | `neorv32_trng.c` | `neorv32_trng.h` | <<_true_random_number_generator_trng>> HAL diff --git a/docs/figures/smp_system.png b/docs/figures/smp_system.png index a55182cd3..2c91c0636 100644 Binary files a/docs/figures/smp_system.png and b/docs/figures/smp_system.png differ diff --git a/rtl/core/neorv32_application_image.vhd b/rtl/core/neorv32_application_image.vhd index df8c1862a..74b03df67 100644 --- a/rtl/core/neorv32_application_image.vhd +++ b/rtl/core/neorv32_application_image.vhd @@ -1,7 +1,7 @@ -- The NEORV32 RISC-V Processor - github.com/stnolting/neorv32 -- Auto-generated memory initialization image (for internal IMEM) -- Source: demo_blink_led/build/main.bin --- Built: 02.01.2025 14:38:10 +-- Built: 04.01.2025 22:17:35 library ieee; use ieee.std_logic_1164.all; @@ -11,7 +11,7 @@ use neorv32.neorv32_package.all; package neorv32_application_image is -constant application_init_size_c : natural := 1260; -- bytes +constant application_init_size_c : natural := 1248; -- bytes constant application_init_image_c : mem32_t := ( x"f14020f3", x"80002217", @@ -23,11 +23,11 @@ x"000022b7", x"80028293", x"30029073", x"00000317", -x"1b030313", +x"1a430313", x"30531073", x"30401073", x"00000397", -x"4b838393", +x"4ac38393", x"80000417", x"fc440413", x"80000497", @@ -37,7 +37,7 @@ x"fb450513", x"80000597", x"fac58593", x"00000617", -x"1b860613", +x"1ac60613", x"00000693", x"00000713", x"00000793", @@ -57,32 +57,31 @@ x"00000e13", x"00000e93", x"00000f13", x"00000f93", -x"06008463", +x"06008263", x"00000797", x"01878793", x"30579073", x"30446073", x"30046073", -x"1040006f", +x"0f80006f", +x"fff40737", +x"00209793", +x"00f70733", +x"00072023", x"34202773", x"800007b7", x"00378793", -x"00f70463", -x"30200073", -x"0ff0000f", -x"fff446b7", -x"0086a683", -x"1337d737", -x"afe70713", -x"0006a783", -x"00f70463", +x"02f71463", +x"bc201073", +x"bc0026f3", +x"ffab4737", +x"32170713", +x"00d71a63", +x"bc171073", +x"bc002173", +x"bc002673", +x"0580006f", x"30200073", -x"0086a103", -x"00c6a603", -x"0006a023", -x"fff40737", -x"00072223", -x"0540006f", x"00838e63", x"00945c63", x"0003a783", @@ -95,9 +94,9 @@ x"00052023", x"00450513", x"ff5ff06f", x"00000417", -x"3a840413", +x"3a040413", x"00000497", -x"3a048493", +x"39848493", x"00945a63", x"00042083", x"000080e7", @@ -106,8 +105,6 @@ x"ff1ff06f", x"0ff0000f", x"0000100f", x"30029073", -x"34201073", -x"34101073", x"00000513", x"00000593", x"000600e7", diff --git a/rtl/core/neorv32_bootloader_image.vhd b/rtl/core/neorv32_bootloader_image.vhd index 165e72af4..6b5b52502 100644 --- a/rtl/core/neorv32_bootloader_image.vhd +++ b/rtl/core/neorv32_bootloader_image.vhd @@ -1,7 +1,7 @@ -- The NEORV32 RISC-V Processor - github.com/stnolting/neorv32 -- Auto-generated memory initialization image (for internal BOOTROM) -- Source: bootloader/build/main.bin --- Built: 02.01.2025 14:38:30 +-- Built: 04.01.2025 22:18:00 library ieee; use ieee.std_logic_1164.all; @@ -11,7 +11,7 @@ use neorv32.neorv32_package.all; package neorv32_bootloader_image is -constant bootloader_init_size_c : natural := 4072; -- bytes +constant bootloader_init_size_c : natural := 4060; -- bytes constant bootloader_init_image_c : mem32_t := ( x"f14020f3", x"80200217", @@ -23,11 +23,11 @@ x"000022b7", x"80028293", x"30029073", x"00000317", -x"12030313", +x"11430313", x"30531073", x"30401073", x"00001397", -x"fb438393", +x"fa838393", x"80200417", x"fc440413", x"80200497", @@ -37,36 +37,35 @@ x"fb450513", x"80200597", x"fb458593", x"00000617", -x"12860613", +x"11c60613", x"00000693", x"00000713", x"00000793", -x"06008463", +x"06008263", x"00000797", x"01878793", x"30579073", x"30446073", x"30046073", -x"0b40006f", +x"0a80006f", +x"fff40737", +x"00209793", +x"00f70733", +x"00072023", x"34202773", x"800007b7", x"00378793", -x"00f70463", -x"30200073", -x"0ff0000f", -x"fff446b7", -x"0086a683", -x"1337d737", -x"afe70713", -x"0006a783", -x"00f70463", +x"02f71463", +x"bc201073", +x"bc0026f3", +x"ffab4737", +x"32170713", +x"00d71a63", +x"bc171073", +x"bc002173", +x"bc002673", +x"0340006f", x"30200073", -x"0086a103", -x"00c6a603", -x"0006a023", -x"fff40737", -x"00072223", -x"0300006f", x"00838e63", x"00945c63", x"0003a783", @@ -81,8 +80,6 @@ x"ff5ff06f", x"0ff0000f", x"0000100f", x"30029073", -x"34201073", -x"34101073", x"00000513", x"00000593", x"000600e7", @@ -119,7 +116,7 @@ x"ffe017b7", x"00112823", x"00812623", x"00912423", -x"a5c78793", +x"a5078793", x"30579073", x"fffe07b7", x"0087a783", @@ -203,54 +200,54 @@ x"30479073", x"00800793", x"3007a073", x"ffe01537", -x"dd850513", +x"dcc50513", x"6b4000ef", x"f1302573", x"648000ef", x"ffe01537", -x"e1050513", +x"e0450513", x"6a0000ef", x"fffe0437", x"00042503", x"630000ef", x"ffe01537", -x"e1850513", +x"e0c50513", x"688000ef", x"30102573", x"61c000ef", x"ffe01537", -x"e2050513", +x"e1450513", x"674000ef", x"fc002573", x"608000ef", x"ffe01537", -x"e2850513", +x"e1c50513", x"660000ef", x"00842503", x"00100493", x"5f0000ef", x"ffe01537", -x"e3050513", +x"e2450513", x"648000ef", x"00444503", x"00a49533", x"ffc57513", x"5d4000ef", x"ffe01537", -x"e3850513", +x"e2c50513", x"62c000ef", x"00544783", x"00f49533", x"ffc57513", x"5b8000ef", x"ffe014b7", -x"dd448513", +x"dc848513", x"610000ef", x"00842783", x"00f79713", x"06075063", x"ffe01537", -x"e4050513", +x"e3450513", x"5f8000ef", x"2e0000ef", x"00042703", @@ -270,13 +267,13 @@ x"00f69613", x"0a065463", x"ffe01537", x"00472783", -x"e6c50513", +x"e6050513", x"5a8000ef", x"ffe017b7", -x"e7878513", +x"e6c78513", x"59c000ef", x"ffe01537", -x"ef850513", +x"eec50513", x"590000ef", x"fff507b7", x"0007a703", @@ -286,14 +283,14 @@ x"0047a403", x"0ff47413", x"00040513", x"4f4000ef", -x"dd448513", +x"dc848513", x"568000ef", x"f9b40413", x"0ff47413", x"01300793", x"2287e863", x"ffe017b7", -x"f7478793", +x"f6878793", x"00241413", x"00f40433", x"00042783", @@ -317,7 +314,7 @@ x"00b41463", x"f2f564e3", x"00100513", x"6f8000ef", -x"dd448513", +x"dc848513", x"4ec000ef", x"00000513", x"031000ef", @@ -330,20 +327,20 @@ x"800007b7", x"0047a403", x"00041863", x"ffe01537", -x"f0050513", +x"ef450513", x"f1dff06f", x"ffe01537", -x"f1c50513", +x"f1050513", x"4ac000ef", x"00040513", x"440000ef", x"ffe01537", -x"f2450513", +x"f1850513", x"498000ef", x"00400537", x"42c000ef", x"ffe01537", -x"f3c50513", +x"f3050513", x"484000ef", x"fff507b7", x"0007a703", @@ -361,7 +358,7 @@ x"00050663", x"00300513", x"498000ef", x"ffe01537", -x"f4850513", +x"f3c50513", x"43c000ef", x"01045793", x"00178793", @@ -399,7 +396,7 @@ x"00850513", x"40e005b3", x"2a8000ef", x"ffe01537", -x"dbc50513", +x"db050513", x"e09ff06f", x"00f12223", x"1ec000ef", @@ -425,14 +422,14 @@ x"800007b7", x"0047a783", x"e60790e3", x"ffe01537", -x"f5850513", +x"f4c50513", x"da1ff06f", x"fffe07b7", x"0087a783", x"2007f793", x"00079863", x"ffe01537", -x"f6850513", +x"f5c50513", x"d85ff06f", x"00100513", x"e35ff06f", @@ -621,7 +618,7 @@ x"01c00493", x"00945733", x"ffe017b7", x"00f77713", -x"fc478793", +x"fb878793", x"00e787b3", x"0007c503", x"ffc48493", @@ -657,13 +654,13 @@ x"ff810113", x"00812023", x"00050413", x"ffe01537", -x"d6450513", +x"d5850513", x"00112223", x"f99ff0ef", x"00241793", x"ffe01537", x"008787b3", -x"fd450513", +x"fc850513", x"00f50533", x"f81ff0ef", x"00800793", @@ -744,7 +741,7 @@ x"0087a783", x"00e79713", x"04075263", x"ffe01537", -x"d6c50513", +x"d6050513", x"e41ff0ef", x"00048513", x"dd5ff0ef", @@ -757,7 +754,7 @@ x"da5ff0ef", x"34302573", x"db5ff0ef", x"ffe01537", -x"dd450513", +x"dc850513", x"e0dff0ef", x"00440413", x"34141073", @@ -772,7 +769,7 @@ x"00a12023", x"00f4a023", x"02051863", x"ffe01537", -x"d7850513", +x"d6c50513", x"dd1ff0ef", x"00012503", x"004005b7", @@ -783,12 +780,12 @@ x"04f50863", x"00000513", x"0380006f", x"ffe01537", -x"d9850513", +x"d8c50513", x"da5ff0ef", x"00400537", x"d39ff0ef", x"ffe01537", -x"db450513", +x"da850513", x"d91ff0ef", x"fffe07b7", x"0087a783", @@ -820,7 +817,7 @@ x"00d787b3", x"00200513", x"fa0792e3", x"ffe01537", -x"dbc50513", +x"db050513", x"d11ff0ef", x"800007b7", x"0087a223", @@ -854,12 +851,12 @@ x"40a00533", x"e0400437", x"00a47433", x"ffe01537", -x"dc050513", +x"db450513", x"c89ff0ef", x"00040513", x"c1dff0ef", x"ffe01537", -x"dd050513", +x"dc450513", x"c75ff0ef", x"975ff0ef", x"00050863", @@ -909,7 +906,7 @@ x"0a3e3e20", x"444c420a", x"4a203a56", x"20206e61", -x"30322032", +x"30322034", x"480a3532", x"203a5657", x"00000020", @@ -1002,26 +999,26 @@ x"00002e65", x"61766e49", x"2064696c", x"00444d43", -x"ffe00660", -x"ffe00688", -x"ffe00688", -x"ffe0040c", -x"ffe00688", -x"ffe00688", -x"ffe00688", -x"ffe00658", -x"ffe00688", -x"ffe00688", -x"ffe00688", -x"ffe00688", -x"ffe00688", -x"ffe004d0", -x"ffe004e4", -x"ffe00688", +x"ffe00654", +x"ffe0067c", +x"ffe0067c", +x"ffe00400", +x"ffe0067c", +x"ffe0067c", +x"ffe0067c", +x"ffe0064c", +x"ffe0067c", +x"ffe0067c", +x"ffe0067c", +x"ffe0067c", +x"ffe0067c", +x"ffe004c4", x"ffe004d8", -x"ffe00688", -x"ffe00688", -x"ffe00678", +x"ffe0067c", +x"ffe004cc", +x"ffe0067c", +x"ffe0067c", +x"ffe0066c", x"33323130", x"37363534", x"62613938", diff --git a/rtl/core/neorv32_bus.vhd b/rtl/core/neorv32_bus.vhd index 3a8b70a1d..d3f3c82d1 100644 --- a/rtl/core/neorv32_bus.vhd +++ b/rtl/core/neorv32_bus.vhd @@ -211,7 +211,7 @@ end neorv32_bus_switch_rtl; -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- --- Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. -- +-- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- -- Licensed under the BSD-3-Clause license, see LICENSE for details. -- -- SPDX-License-Identifier: BSD-3-Clause -- -- ================================================================================ -- @@ -301,7 +301,7 @@ end neorv32_bus_reg_rtl; -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- --- Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. -- +-- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- -- Licensed under the BSD-3-Clause license, see LICENSE for details. -- -- SPDX-License-Identifier: BSD-3-Clause -- -- ================================================================================ -- @@ -487,7 +487,7 @@ end neorv32_bus_gateway_rtl; -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- --- Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. -- +-- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- -- Licensed under the BSD-3-Clause license, see LICENSE for details. -- -- SPDX-License-Identifier: BSD-3-Clause -- -- ================================================================================ -- @@ -747,7 +747,7 @@ end neorv32_bus_io_switch_rtl; -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- --- Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. -- +-- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- -- Licensed under the BSD-3-Clause license, see LICENSE for details. -- -- SPDX-License-Identifier: BSD-3-Clause -- -- ================================================================================ -- @@ -863,7 +863,7 @@ begin sys_req_o.rw <= '1' when (arbiter.state = S_WRITE) or (arbiter.state = S_WRITE_WAIT) else core_req_i.rw; sys_req_o.src <= core_req_i.src; sys_req_o.priv <= core_req_i.priv; - sys_req_o.amo <= core_req_i.amo; + sys_req_o.amo <= core_req_i.amo; -- set during the entire read-modify-write operation sys_req_o.amoop <= (others => '0'); -- the specific AMO type should not matter after this point sys_req_o.fence <= core_req_i.fence; sys_req_o.sleep <= core_req_i.sleep; diff --git a/rtl/core/neorv32_cpu.vhd b/rtl/core/neorv32_cpu.vhd index 37c216a0d..60fbe5127 100644 --- a/rtl/core/neorv32_cpu.vhd +++ b/rtl/core/neorv32_cpu.vhd @@ -22,7 +22,8 @@ use neorv32.neorv32_package.all; entity neorv32_cpu is generic ( -- General -- - HART_ID : natural; -- hardware thread ID + HART_ID : natural range 0 to 3; -- hardware thread ID + NUM_HARTS : natural range 1 to 4; -- total number of harts in the system, has to be a power of 2 VENDOR_ID : std_ulogic_vector(31 downto 0); -- vendor's JEDEC ID BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode parking loop entry address @@ -69,20 +70,28 @@ entity neorv32_cpu is ); port ( -- global control -- - clk_i : in std_ulogic; -- switchable global clock, rising edge - rstn_i : in std_ulogic; -- global reset, low-active, async + clk_i : in std_ulogic; -- switchable global clock, rising edge + rstn_i : in std_ulogic; -- global reset, low-active, async -- interrupts -- - msi_i : in std_ulogic; -- risc-v machine software interrupt - mei_i : in std_ulogic; -- risc-v machine external interrupt - mti_i : in std_ulogic; -- risc-v machine timer interrupt - firq_i : in std_ulogic_vector(15 downto 0); -- custom fast interrupts - dbi_i : in std_ulogic; -- risc-v debug halt request interrupt + msi_i : in std_ulogic; -- risc-v machine software interrupt + mei_i : in std_ulogic; -- risc-v machine external interrupt + mti_i : in std_ulogic; -- risc-v machine timer interrupt + firq_i : in std_ulogic_vector(15 downto 0); -- custom fast interrupts + dbi_i : in std_ulogic; -- risc-v debug halt request interrupt -- instruction bus interface -- - ibus_req_o : out bus_req_t; -- request bus - ibus_rsp_i : in bus_rsp_t; -- response bus + ibus_req_o : out bus_req_t; -- request bus + ibus_rsp_i : in bus_rsp_t; -- response bus -- data bus interface -- - dbus_req_o : out bus_req_t; -- request bus - dbus_rsp_i : in bus_rsp_t -- response bus + dbus_req_o : out bus_req_t; -- request bus + dbus_rsp_i : in bus_rsp_t; -- response bus + -- ICC TX links -- + icc_tx_rdy_o : out std_ulogic_vector(NUM_HARTS-1 downto 0); -- data available + icc_tx_ack_i : in std_ulogic_vector(NUM_HARTS-1 downto 0); -- read-enable + icc_tx_dat_o : out std_ulogic_vector((NUM_HARTS*XLEN)-1 downto 0); -- data word + -- ICC RX links -- + icc_rx_rdy_i : in std_ulogic_vector(NUM_HARTS-1 downto 0); -- data available + icc_rx_ack_o : out std_ulogic_vector(NUM_HARTS-1 downto 0); -- read-enable + icc_rx_dat_i : in std_ulogic_vector((NUM_HARTS*XLEN)-1 downto 0) -- data word ); end neorv32_cpu; @@ -98,12 +107,14 @@ architecture neorv32_cpu_rtl of neorv32_cpu is RISCV_ISA_Zksh and RISCV_ISA_Zksed; -- Zks: ShangMi suite -- external CSR interface -- + signal xcsr_re : std_ulogic; signal xcsr_we : std_ulogic; signal xcsr_addr : std_ulogic_vector(11 downto 0); signal xcsr_wdata : std_ulogic_vector(XLEN-1 downto 0); signal xcsr_rdata_pmp : std_ulogic_vector(XLEN-1 downto 0); signal xcsr_rdata_alu : std_ulogic_vector(XLEN-1 downto 0); signal xcsr_rdata_res : std_ulogic_vector(XLEN-1 downto 0); + signal xcsr_rdata_icc : std_ulogic_vector(XLEN-1 downto 0); -- local signals -- signal clk_gated : std_ulogic; -- switchable clock (clock gating) @@ -128,7 +139,7 @@ architecture neorv32_cpu_rtl of neorv32_cpu is begin - -- Sanity Checks -------------------------------------------------------------------------- + -- Configuration Info and Sanity Checks --------------------------------------------------- -- ------------------------------------------------------------------------------------------- -- CPU ISA configuration (in alphabetical order - not in canonical order!) -- assert false report "[NEORV32] CPU ISA: rv32" & @@ -177,6 +188,10 @@ begin -- simulation notifier -- assert not is_simulation_c report "[NEORV32] Assuming this is a simulation." severity warning; + -- ID checks -- + assert is_power_of_two_f(NUM_HARTS) report "[NEORV32] NUM_HARTS has to be a power of two." severity error; + assert (HART_ID < NUM_HARTS) report "[NEORV32] HART_ID out of range." severity error; + -- Clock Gating --------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- @@ -197,7 +212,7 @@ begin end generate; - -- Control Unit --------------------------------------------------------------------------- + -- Control Unit (CTRL) -------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cpu_control_inst: entity neorv32.neorv32_cpu_control generic map ( @@ -269,7 +284,7 @@ begin csr_rdata_o => csr_rdata, -- CSR read data -- external CSR interface -- xcsr_we_o => xcsr_we, -- global write enable - xcsr_re_o => open, -- global read enable + xcsr_re_o => xcsr_re, -- global read enable xcsr_addr_o => xcsr_addr, -- address xcsr_wdata_o => xcsr_wdata, -- write data xcsr_rdata_i => xcsr_rdata_res, -- read data @@ -287,10 +302,10 @@ begin irq_machine <= mti_i & mei_i & msi_i; -- external CSR read-back -- - xcsr_rdata_res <= xcsr_rdata_pmp or xcsr_rdata_alu; + xcsr_rdata_res <= xcsr_rdata_alu or xcsr_rdata_pmp or xcsr_rdata_icc; - -- Register File -------------------------------------------------------------------------- + -- Register File (RF) --------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cpu_regfile_inst: entity neorv32.neorv32_cpu_regfile generic map ( @@ -314,7 +329,7 @@ begin rf_wdata <= alu_res or lsu_rdata or csr_rdata or pc_ret; - -- ALU (Arithmetic/Logic Unit) and ALU Co-Processors -------------------------------------- + -- Arithmetic/Logic Unit (ALU) and ALU Co-Processors -------------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cpu_alu_inst: entity neorv32.neorv32_cpu_alu generic map ( @@ -364,7 +379,7 @@ begin ); - -- Load/Store Unit ------------------------------------------------------------------------ + -- Load/Store Unit (LSU) ------------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- neorv32_cpu_lsu_inst: entity neorv32.neorv32_cpu_lsu generic map ( @@ -389,9 +404,9 @@ begin ); - -- Physical Memory Protection ------------------------------------------------------------- + -- Physical Memory Protection (PMP) ------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - pmp_inst_true: + pmp_enabled: if RISCV_ISA_Smpmp generate neorv32_cpu_pmp_inst: entity neorv32.neorv32_cpu_pmp generic map ( @@ -418,11 +433,47 @@ begin ); end generate; - pmp_inst_false: + pmp_disabled: if not RISCV_ISA_Smpmp generate xcsr_rdata_pmp <= (others => '0'); pmp_fault <= '0'; end generate; + -- Inter-Core Communication (ICC) --------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + icc_enabled: + if NUM_HARTS > 1 generate + neorv32_cpu_icc_inst: entity neorv32.neorv32_cpu_icc + generic map ( + HART_ID => HART_ID, -- ID of this core + NUM_HARTS => NUM_HARTS -- number of cores, has to be a power of two + ) + port map ( + -- global control -- + clk_i => clk_i, -- global clock, rising edge + rstn_i => rstn_i, -- global reset, low-active, async + -- CSR interface -- + csr_we_i => xcsr_we, -- global write enable + csr_re_i => xcsr_re, -- global read enable + csr_addr_i => xcsr_addr, -- address + csr_wdata_i => xcsr_wdata, -- write data + csr_rdata_o => xcsr_rdata_icc, -- read data + -- ICC TX links -- + icc_tx_rdy_o => icc_tx_rdy_o, -- data available + icc_tx_ack_i => icc_tx_ack_i, -- read-enable + icc_tx_dat_o => icc_tx_dat_o, -- data word + -- ICC RX links -- + icc_rx_rdy_i => icc_rx_rdy_i, -- data available + icc_rx_ack_o => icc_rx_ack_o, -- read-enable + icc_rx_dat_i => icc_rx_dat_i -- data word + ); + end generate; + + icc_disabled: + if NUM_HARTS = 1 generate + xcsr_rdata_icc <= (others => '0'); + end generate; + + end neorv32_cpu_rtl; diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index 24058d89c..0f182e612 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -29,7 +29,7 @@ use neorv32.neorv32_package.all; entity neorv32_cpu_control is generic ( -- General -- - HART_ID : natural; -- hardware thread ID + HART_ID : natural range 0 to 3; -- hardware thread ID VENDOR_ID : std_ulogic_vector(31 downto 0); -- vendor's JEDEC ID BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug-mode parking loop entry address, 4-byte aligned @@ -923,9 +923,10 @@ begin csr_valid(2) <= bool_to_ulogic_f(RISCV_ISA_Zfinx); -- available if FPU implemented -- machine trap setup/handling, environment/information registers, etc. -- - when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | csr_mscratch_c | - csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | csr_mtinst_c | csr_mcountinhibit_c | - csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c => + when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | csr_mscratch_c | + csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | csr_mtinst_c | csr_mcountinhibit_c | + csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c | + csr_mxiccrxd_c | csr_mxicctxd_c | csr_mxiccsr0_c | csr_mxiccsr1_c => csr_valid(2) <= '1'; -- always implemented -- machine-controlled user-mode CSRs -- @@ -1658,6 +1659,12 @@ begin csr.rdata <= xcsr_rdata_i; -- implemented externally end if; + -- -------------------------------------------------------------------- + -- inter-core communication + -- -------------------------------------------------------------------- + when csr_mxiccrxd_c | csr_mxicctxd_c | csr_mxiccsr0_c | csr_mxiccsr1_c => + csr.rdata <= xcsr_rdata_i; -- implemented externally + -- -------------------------------------------------------------------- -- machine trap setup -- -------------------------------------------------------------------- diff --git a/rtl/core/neorv32_cpu_icc.vhd b/rtl/core/neorv32_cpu_icc.vhd new file mode 100644 index 000000000..dfac5deb4 --- /dev/null +++ b/rtl/core/neorv32_cpu_icc.vhd @@ -0,0 +1,139 @@ +-- ================================================================================ -- +-- NEORV32 CPU - Inter-Core Communication Unit (ICC) -- +-- -------------------------------------------------------------------------------- -- +-- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- +-- Copyright (c) NEORV32 contributors. -- +-- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- +-- Licensed under the BSD-3-Clause license, see LICENSE for details. -- +-- SPDX-License-Identifier: BSD-3-Clause -- +-- ================================================================================ -- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library neorv32; +use neorv32.neorv32_package.all; + +entity neorv32_cpu_icc is + generic ( + HART_ID : natural range 0 to 3; -- ID of this core + NUM_HARTS : natural range 1 to 4 -- number of cores, has to be a power of two + ); + port ( + -- global control -- + clk_i : in std_ulogic; -- global clock, rising edge + rstn_i : in std_ulogic; -- global reset, low-active, async + -- CSR interface -- + csr_we_i : in std_ulogic; -- global write enable + csr_re_i : in std_ulogic; -- global read enable + csr_addr_i : in std_ulogic_vector(11 downto 0); -- address + csr_wdata_i : in std_ulogic_vector(XLEN-1 downto 0); -- write data + csr_rdata_o : out std_ulogic_vector(XLEN-1 downto 0); -- read data + -- ICC TX links -- + icc_tx_rdy_o : out std_ulogic_vector(NUM_HARTS-1 downto 0); -- data available + icc_tx_ack_i : in std_ulogic_vector(NUM_HARTS-1 downto 0); -- read-enable + icc_tx_dat_o : out std_ulogic_vector((NUM_HARTS*XLEN)-1 downto 0); -- data word + -- ICC RX links -- + icc_rx_rdy_i : in std_ulogic_vector(NUM_HARTS-1 downto 0); -- data available + icc_rx_ack_o : out std_ulogic_vector(NUM_HARTS-1 downto 0); -- read-enable + icc_rx_dat_i : in std_ulogic_vector((NUM_HARTS*XLEN)-1 downto 0) -- data word + ); +end neorv32_cpu_icc; + +architecture neorv32_cpu_icc_rtl of neorv32_cpu_icc is + + -- link select -- + constant id_width_c : natural := index_size_f(NUM_HARTS); + signal link_id : std_ulogic_vector(id_width_c-1 downto 0); + + -- link control -- + signal link_sel, tx_fifo_we, tx_fifo_free : std_ulogic_vector(NUM_HARTS-1 downto 0); + + -- incoming data as array -- + type rx_data_t is array (0 to NUM_HARTS-1) of std_ulogic_vector(XLEN-1 downto 0); + signal rx_data : rx_data_t; + +begin + + -- CSR Access ----------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + csr_write: process(rstn_i, clk_i) + begin + if (rstn_i = '0') then + link_id <= (others => '0'); + elsif rising_edge(clk_i) then + if (csr_we_i = '1') and (csr_addr_i(11 downto 1) = csr_mxiccsr0_c(11 downto 1)) then + link_id <= csr_wdata_i(id_width_c-1 downto 0); + end if; + end if; + end process csr_write; + + csr_read: process(csr_addr_i, link_id, icc_rx_rdy_i, tx_fifo_free, rx_data) + begin + csr_rdata_o <= (others => '0'); -- default + if (csr_addr_i(11 downto 2) = csr_mxiccrxd_c(11 downto 2)) then -- ICC CSRs base address + if (csr_addr_i(1) = '0') then -- data register(s) + csr_rdata_o <= rx_data(to_integer(unsigned(link_id))); + else -- control and status register(s) + csr_rdata_o(XLEN-1) <= icc_rx_rdy_i(to_integer(unsigned(link_id))); + csr_rdata_o(XLEN-2) <= tx_fifo_free(to_integer(unsigned(link_id))); + csr_rdata_o(id_width_c-1 downto 0) <= link_id; + end if; + end if; + end process csr_read; + + + -- Communication Links -------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + link_gen: + for i in 0 to NUM_HARTS-1 generate + + -- TX FIFOs for outgoing links -- + queue_gen: + if i /= HART_ID generate + queue_inst: entity neorv32.neorv32_fifo + generic map ( + FIFO_DEPTH => 4, -- yes, this is fixed + FIFO_WIDTH => XLEN, + FIFO_RSYNC => true, + FIFO_SAFE => true, + FULL_RESET => true + ) + port map ( + -- control -- + clk_i => clk_i, + rstn_i => rstn_i, + clear_i => '0', + half_o => open, + -- write port -- + wdata_i => csr_wdata_i, + we_i => tx_fifo_we(i), + free_o => tx_fifo_free(i), + -- read port -- + re_i => icc_tx_ack_i(i), + rdata_o => icc_tx_dat_o(i*XLEN+(XLEN-1) downto i*XLEN), + avail_o => icc_tx_rdy_o(i) + ); + end generate; + + -- no FIFO/link for *this* core -- + queue_terminate: + if i = HART_ID generate + tx_fifo_free(i) <= '0'; + icc_tx_dat_o(i*XLEN+(XLEN-1) downto i*XLEN) <= (others => '0'); + icc_tx_rdy_o(i) <= '0'; + end generate; + + -- reorganize incoming links as 2d-array -- + rx_data(i) <= icc_rx_dat_i(i*XLEN+(XLEN-1) downto i*XLEN); + + -- link control -- + link_sel(i) <= '1' when (unsigned(link_id) = to_unsigned(i, id_width_c)) else '0'; + icc_rx_ack_o(i) <= '1' when (csr_re_i = '1') and (csr_addr_i = csr_mxiccrxd_c) and (link_sel(i) = '1') else '0'; + tx_fifo_we(i) <= '1' when (csr_we_i = '1') and (csr_addr_i = csr_mxicctxd_c) and (link_sel(i) = '1') else '0'; + + end generate; + + +end neorv32_cpu_icc_rtl; diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index f957a7820..3f254eef0 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100807"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100808"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -436,7 +436,7 @@ package neorv32_package is constant csr_dcsr_c : std_ulogic_vector(11 downto 0) := x"7b0"; constant csr_dpc_c : std_ulogic_vector(11 downto 0) := x"7b1"; constant csr_dscratch0_c : std_ulogic_vector(11 downto 0) := x"7b2"; - -- NEORV32-specific user registers -- + -- NEORV32-specific read/write user registers -- constant csr_cfureg0_c : std_ulogic_vector(11 downto 0) := x"800"; constant csr_cfureg1_c : std_ulogic_vector(11 downto 0) := x"801"; constant csr_cfureg2_c : std_ulogic_vector(11 downto 0) := x"802"; @@ -475,8 +475,11 @@ package neorv32_package is constant csr_mhpmcounter13h_c : std_ulogic_vector(11 downto 0) := x"b8d"; constant csr_mhpmcounter14h_c : std_ulogic_vector(11 downto 0) := x"b8e"; constant csr_mhpmcounter15h_c : std_ulogic_vector(11 downto 0) := x"b8f"; - -- NEORV32-specific read-write machine registers -- ---constant csr_mxstatus_c : std_ulogic_vector(11 downto 0) := x"bc0"; -- to-be-implemented + -- NEORV32-specific read/write machine registers -- + constant csr_mxiccrxd_c : std_ulogic_vector(11 downto 0) := x"bc0"; + constant csr_mxicctxd_c : std_ulogic_vector(11 downto 0) := x"bc1"; + constant csr_mxiccsr0_c : std_ulogic_vector(11 downto 0) := x"bc2"; + constant csr_mxiccsr1_c : std_ulogic_vector(11 downto 0) := x"bc3"; -- user counters/timers -- constant csr_cycle_c : std_ulogic_vector(11 downto 0) := x"c00"; --constant csr_time_c : std_ulogic_vector(11 downto 0) := x"c01"; @@ -493,7 +496,6 @@ package neorv32_package is constant csr_mconfigptr_c : std_ulogic_vector(11 downto 0) := x"f15"; -- NEORV32-specific read-only machine registers -- constant csr_mxisa_c : std_ulogic_vector(11 downto 0) := x"fc0"; ---constant csr_mxisah_c : std_ulogic_vector(11 downto 0) := x"fc1"; -- to-be-implemented -- ********************************************************************************************************** -- CPU Control diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index c53bf89ae..55a1c55a5 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -310,6 +310,14 @@ architecture neorv32_top_rtl of neorv32_top is signal dci_ndmrstn : std_ulogic; signal dci_haltreq : std_ulogic_vector(num_cores_c-1 downto 0); + -- CPU ICC links -- + type icc_rdy_t is array (0 to num_cores_c-1) of std_ulogic_vector(num_cores_c-1 downto 0); + type icc_ack_t is array (0 to num_cores_c-1) of std_ulogic_vector(num_cores_c-1 downto 0); + type icc_dat_t is array (0 to num_cores_c-1) of std_ulogic_vector(num_cores_c*32-1 downto 0); + signal icc_tx_rdy, icc_rx_rdy : icc_rdy_t; + signal icc_tx_ack, icc_rx_ack : icc_ack_t; + signal icc_tx_dat, icc_rx_dat : icc_dat_t; + -- bus: CPU core(s) + L1 caches -- type multicore_req_t is array (0 to num_cores_c-1) of bus_req_t; type multicore_rsp_t is array (0 to num_cores_c-1) of bus_rsp_t; @@ -499,6 +507,7 @@ begin generic map ( -- General -- HART_ID => i, + NUM_HARTS => num_cores_c, VENDOR_ID => vendorid_c, BOOT_ADDR => cpu_boot_addr_c, DEBUG_PARK_ADDR => dm_park_entry_c, @@ -545,26 +554,54 @@ begin ) port map ( -- global control -- - clk_i => clk_i, - rstn_i => rstn_sys, + clk_i => clk_i, + rstn_i => rstn_sys, -- interrupts -- - msi_i => msw_irq(i), - mei_i => mext_irq_i, - mti_i => mtime_irq(i), - firq_i => cpu_firq, - dbi_i => dci_haltreq(i), + msi_i => msw_irq(i), + mei_i => mext_irq_i, + mti_i => mtime_irq(i), + firq_i => cpu_firq, + dbi_i => dci_haltreq(i), -- instruction bus interface -- - ibus_req_o => cpu_i_req(i), - ibus_rsp_i => cpu_i_rsp(i), + ibus_req_o => cpu_i_req(i), + ibus_rsp_i => cpu_i_rsp(i), -- data bus interface -- - dbus_req_o => cpu_d_req(i), - dbus_rsp_i => cpu_d_rsp(i) + dbus_req_o => cpu_d_req(i), + dbus_rsp_i => cpu_d_rsp(i), + -- ICC TX links -- + icc_tx_rdy_o => icc_tx_rdy(i), + icc_tx_ack_i => icc_tx_ack(i), + icc_tx_dat_o => icc_tx_dat(i), + -- ICC RX links -- + icc_rx_rdy_i => icc_rx_rdy(i), + icc_rx_ack_o => icc_rx_ack(i), + icc_rx_dat_i => icc_rx_dat(i) ); + -- inter-core communication (ICC) links (connect every core with every other) -- + icc_gen: + for j in 0 to num_cores_c-1 generate + + icc_gen_terminate: -- do not connect a core's link to itself + if i = j generate + icc_rx_rdy(i)(j) <= '0'; + icc_tx_ack(i)(j) <= '0'; + icc_rx_dat(i)(j*32+31 downto j*32) <= (others => '0'); + end generate; + + ip_gen_connect: + if i /= j generate + icc_rx_rdy(i)(j) <= icc_tx_rdy(j)(i); + icc_tx_ack(i)(j) <= icc_rx_ack(j)(i); + icc_rx_dat(i)(j*32+31 downto j*32) <= icc_tx_dat(j)(i*32+31 downto i*32); + end generate; + + end generate; + -- CPU L1 Instruction Cache (I-Cache) ----------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_icache_inst_true: + neorv32_icache_enabled: if ICACHE_EN generate neorv32_icache_inst: entity neorv32.neorv32_cache generic map ( @@ -584,7 +621,7 @@ begin ); end generate; - neorv32_icache_inst_false: + neorv32_icache_disabled: if not ICACHE_EN generate icache_req(i) <= cpu_i_req(i); cpu_i_rsp(i) <= icache_rsp(i); @@ -593,7 +630,7 @@ begin -- CPU L1 Data Cache (D-Cache) ------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_dcache_inst_true: + neorv32_dcache_enabled: if DCACHE_EN generate neorv32_dcache_inst: entity neorv32.neorv32_cache generic map ( @@ -613,7 +650,7 @@ begin ); end generate; - neorv32_dcache_inst_false: + neorv32_dcache_disabled: if not DCACHE_EN generate dcache_req(i) <= cpu_d_req(i); cpu_d_rsp(i) <= dcache_rsp(i); @@ -677,7 +714,7 @@ begin -- Direct Memory Access Controller (DMA) Complex -- ************************************************************************************************************************** - neorv32_dma_complex_true: + neorv32_dma_complex_enabled: if IO_DMA_EN generate -- DMA Controller ------------------------------------------------------------------------- @@ -715,9 +752,9 @@ begin x_rsp_i => main_rsp ); - end generate; -- /neorv32_dma_complex_true + end generate; -- /neorv32_dma_complex_enabled - neorv32_dma_complex_false: + neorv32_dma_complex_disabled: if not IO_DMA_EN generate iodev_rsp(IODEV_DMA) <= rsp_terminate_c; main_req <= complex_req; @@ -730,7 +767,7 @@ begin -- Read-Modify-Write Controller for Atomic Memory Operations -- ************************************************************************************************************************** - neorv32_bus_amo_ctrl_true: + neorv32_bus_amo_ctrl_enabled: if RISCV_ISA_Zaamo generate neorv32_bus_amo_ctrl_inst: entity neorv32.neorv32_bus_amo_ctrl port map ( @@ -743,7 +780,7 @@ begin ); end generate; - neorv32_bus_amo_ctrl_false: + neorv32_bus_amo_ctrl_disabled: if not RISCV_ISA_Zaamo generate main2_req <= main_req; main_rsp <= main2_rsp; @@ -811,7 +848,7 @@ begin -- Processor-Internal Instruction Memory (IMEM) ------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_int_imem_inst_true: + neorv32_int_imem_enabled: if MEM_INT_IMEM_EN generate neorv32_int_imem_inst: entity neorv32.neorv32_imem generic map ( @@ -826,7 +863,7 @@ begin ); end generate; - neorv32_int_imem_inst_false: + neorv32_int_imem_disabled: if not MEM_INT_IMEM_EN generate imem_rsp <= rsp_terminate_c; end generate; @@ -834,7 +871,7 @@ begin -- Processor-Internal Data Memory (DMEM) -------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_int_dmem_inst_true: + neorv32_int_dmem_enabled: if MEM_INT_DMEM_EN generate neorv32_int_dmem_inst: entity neorv32.neorv32_dmem generic map ( @@ -848,7 +885,7 @@ begin ); end generate; - neorv32_int_dmem_inst_false: + neorv32_int_dmem_disabled: if not MEM_INT_DMEM_EN generate dmem_rsp <= rsp_terminate_c; end generate; @@ -856,7 +893,7 @@ begin -- Execute In-Place Module (XIP) ---------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_xip_inst_true: + neorv32_xip_enabled: if XIP_EN generate -- XIP interface -- @@ -880,7 +917,7 @@ begin ); -- XIP cache (XIP-CACHE) -- - neorv32_xipcache_inst_true: + neorv32_xipcache_enabled: if XIP_CACHE_EN generate neorv32_xcache_inst: entity neorv32.neorv32_cache generic map ( @@ -900,15 +937,15 @@ begin ); end generate; - neorv32_xipcache_inst_false: + neorv32_xipcache_disabled: if not XIP_CACHE_EN generate xipcache_req <= xip_req; xip_rsp <= xipcache_rsp; end generate; - end generate; -- /neorv32_xip_inst_true + end generate; -- /neorv32_xip_enabled - neorv32_xip_inst_false: + neorv32_xip_disabled: if not XIP_EN generate iodev_rsp(IODEV_XIP) <= rsp_terminate_c; xip_rsp <= rsp_terminate_c; @@ -921,7 +958,7 @@ begin -- External Bus Interface (XBUS) ---------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_xbus_inst_true: + neorv32_xbus_enabled: if XBUS_EN generate -- external bus gateway (XBUS) -- @@ -949,7 +986,7 @@ begin ); -- external bus cache (X-CACHE) -- - neorv32_xcache_inst_true: + neorv32_xcache_enabled: if XBUS_CACHE_EN generate neorv32_xcache_inst: entity neorv32.neorv32_cache generic map ( @@ -969,15 +1006,15 @@ begin ); end generate; - neorv32_xcache_inst_false: + neorv32_xcache_disabled: if not XBUS_CACHE_EN generate xcache_req <= xbus_req; xbus_rsp <= xcache_rsp; end generate; - end generate; -- /neorv32_xbus_inst_true + end generate; -- /neorv32_xbus_enabled - neorv32_xbus_inst_false: + neorv32_xbus_disabled: if not XBUS_EN generate xbus_rsp <= rsp_terminate_c; xbus_adr_o <= (others => '0'); @@ -1081,7 +1118,7 @@ begin -- Processor-Internal Bootloader ROM (BOOTROM) -------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_boot_rom_inst_true: + neorv32_boot_rom_enabled: if bootrom_en_c generate neorv32_boot_rom_inst: entity neorv32.neorv32_boot_rom port map ( @@ -1092,7 +1129,7 @@ begin ); end generate; - neorv32_boot_rom_inst_false: + neorv32_boot_rom_disabled: if not bootrom_en_c generate iodev_rsp(IODEV_BOOTROM) <= rsp_terminate_c; end generate; @@ -1100,7 +1137,7 @@ begin -- Custom Functions Subsystem (CFS) ------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_cfs_inst_true: + neorv32_cfs_enabled: if IO_CFS_EN generate neorv32_cfs_inst: entity neorv32.neorv32_cfs generic map ( @@ -1121,7 +1158,7 @@ begin ); end generate; - neorv32_cfs_inst_false: + neorv32_cfs_disabled: if not IO_CFS_EN generate iodev_rsp(IODEV_CFS) <= rsp_terminate_c; clk_gen_en(CG_CFS) <= '0'; @@ -1132,7 +1169,7 @@ begin -- Serial Data Interface (SDI) ------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_sdi_inst_true: + neorv32_sdi_enabled: if IO_SDI_EN generate neorv32_sdi_inst: entity neorv32.neorv32_sdi generic map ( @@ -1151,7 +1188,7 @@ begin ); end generate; - neorv32_sdi_inst_false: + neorv32_sdi_disabled: if not IO_SDI_EN generate iodev_rsp(IODEV_SDI) <= rsp_terminate_c; sdi_dat_o <= '0'; @@ -1161,7 +1198,7 @@ begin -- General Purpose Input/Output Port (GPIO) ----------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_gpio_inst_true: + neorv32_gpio_enabled: if io_gpio_en_c generate neorv32_gpio_inst: entity neorv32.neorv32_gpio generic map ( @@ -1177,7 +1214,7 @@ begin ); end generate; - neorv32_gpio_inst_false: + neorv32_gpio_disabled: if not io_gpio_en_c generate iodev_rsp(IODEV_GPIO) <= rsp_terminate_c; gpio_o <= (others => '0'); @@ -1186,7 +1223,7 @@ begin -- Watch Dog Timer (WDT) ------------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_wdt_inst_true: + neorv32_wdt_enabled: if IO_WDT_EN generate neorv32_wdt_inst: entity neorv32.neorv32_wdt port map ( @@ -1202,7 +1239,7 @@ begin ); end generate; - neorv32_wdt_inst_false: + neorv32_wdt_disabled: if not IO_WDT_EN generate iodev_rsp(IODEV_WDT) <= rsp_terminate_c; clk_gen_en(CG_WDT) <= '0'; @@ -1212,7 +1249,7 @@ begin -- Core Local Interruptor (CLINT) --------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_clint_inst_true: + neorv32_clint_enabled: if IO_CLINT_EN generate neorv32_clint_inst: entity neorv32.neorv32_clint generic map ( @@ -1229,7 +1266,7 @@ begin ); end generate; - neorv32_clint_inst_false: + neorv32_clint_disabled: if not IO_CLINT_EN generate iodev_rsp(IODEV_CLINT) <= rsp_terminate_c; mtime_time_o <= (others => '0'); @@ -1240,7 +1277,7 @@ begin -- Primary Universal Asynchronous Receiver/Transmitter (UART0) ---------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_uart0_inst_true: + neorv32_uart0_enabled: if IO_UART0_EN generate neorv32_uart0_inst: entity neorv32.neorv32_uart generic map ( @@ -1265,7 +1302,7 @@ begin ); end generate; - neorv32_uart0_inst_false: + neorv32_uart0_disabled: if not IO_UART0_EN generate iodev_rsp(IODEV_UART0) <= rsp_terminate_c; uart0_txd_o <= '0'; @@ -1278,7 +1315,7 @@ begin -- Secondary Universal Asynchronous Receiver/Transmitter (UART1) -------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_uart1_inst_true: + neorv32_uart1_enabled: if IO_UART1_EN generate neorv32_uart1_inst: entity neorv32.neorv32_uart generic map ( @@ -1303,7 +1340,7 @@ begin ); end generate; - neorv32_uart1_inst_false: + neorv32_uart1_disabled: if not IO_UART1_EN generate iodev_rsp(IODEV_UART1) <= rsp_terminate_c; uart1_txd_o <= '0'; @@ -1316,7 +1353,7 @@ begin -- Serial Peripheral Interface (SPI) ------------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_spi_inst_true: + neorv32_spi_enabled: if IO_SPI_EN generate neorv32_spi_inst: entity neorv32.neorv32_spi generic map ( @@ -1337,7 +1374,7 @@ begin ); end generate; - neorv32_spi_inst_false: + neorv32_spi_disabled: if not IO_SPI_EN generate iodev_rsp(IODEV_SPI) <= rsp_terminate_c; spi_clk_o <= '0'; @@ -1350,7 +1387,7 @@ begin -- Two-Wire Interface (TWI) --------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_twi_inst_true: + neorv32_twi_enabled: if IO_TWI_EN generate neorv32_twi_inst: entity neorv32.neorv32_twi generic map ( @@ -1371,7 +1408,7 @@ begin ); end generate; - neorv32_twi_inst_false: + neorv32_twi_disabled: if not IO_TWI_EN generate iodev_rsp(IODEV_TWI) <= rsp_terminate_c; twi_sda_o <= '1'; @@ -1383,7 +1420,7 @@ begin -- Two-Wire Device (TWD) ------------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_twd_inst_true: + neorv32_twd_enabled: if IO_TWD_EN generate neorv32_twd_inst: entity neorv32.neorv32_twd generic map ( @@ -1404,7 +1441,7 @@ begin ); end generate; - neorv32_twd_inst_false: + neorv32_twd_disabled: if not IO_TWD_EN generate iodev_rsp(IODEV_TWD) <= rsp_terminate_c; twd_sda_o <= '1'; @@ -1416,7 +1453,7 @@ begin -- Pulse-Width Modulation Controller (PWM) ------------------------------------------------ -- ------------------------------------------------------------------------------------------- - neorv32_pwm_inst_true: + neorv32_pwm_enabled: if io_pwm_en_c generate neorv32_pwm_inst: entity neorv32.neorv32_pwm generic map ( @@ -1433,7 +1470,7 @@ begin ); end generate; - neorv32_pwm_inst_false: + neorv32_pwm_disabled: if not io_pwm_en_c generate iodev_rsp(IODEV_PWM) <= rsp_terminate_c; clk_gen_en(CG_PWM) <= '0'; @@ -1443,7 +1480,7 @@ begin -- True Random Number Generator (TRNG) ---------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_trng_inst_true: + neorv32_trng_enabled: if IO_TRNG_EN generate neorv32_trng_inst: entity neorv32.neorv32_trng generic map ( @@ -1457,7 +1494,7 @@ begin ); end generate; - neorv32_trng_inst_false: + neorv32_trng_disabled: if not IO_TRNG_EN generate iodev_rsp(IODEV_TRNG) <= rsp_terminate_c; end generate; @@ -1465,7 +1502,7 @@ begin -- Smart LED (WS2811/WS2812) Interface (NEOLED) ------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_neoled_inst_true: + neorv32_neoled_enabled: if IO_NEOLED_EN generate neorv32_neoled_inst: entity neorv32.neorv32_neoled generic map ( @@ -1483,7 +1520,7 @@ begin ); end generate; - neorv32_neoled_inst_false: + neorv32_neoled_disabled: if not IO_NEOLED_EN generate iodev_rsp(IODEV_NEOLED) <= rsp_terminate_c; clk_gen_en(CG_NEOLED) <= '0'; @@ -1494,7 +1531,7 @@ begin -- External Interrupt Controller (XIRQ) --------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_xirq_inst_true: + neorv32_xirq_enabled: if io_xirq_en_c generate neorv32_xirq_inst: entity neorv32.neorv32_xirq generic map ( @@ -1510,7 +1547,7 @@ begin ); end generate; - neorv32_xirq_inst_false: + neorv32_xirq_disabled: if not io_xirq_en_c generate iodev_rsp(IODEV_XIRQ) <= rsp_terminate_c; firq(FIRQ_XIRQ) <= '0'; @@ -1519,7 +1556,7 @@ begin -- General Purpose Timer (GPTMR) ---------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_gptmr_inst_true: + neorv32_gptmr_enabled: if IO_GPTMR_EN generate neorv32_gptmr_inst: entity neorv32.neorv32_gptmr port map ( @@ -1533,7 +1570,7 @@ begin ); end generate; - neorv32_gptmr_inst_false: + neorv32_gptmr_disabled: if not IO_GPTMR_EN generate iodev_rsp(IODEV_GPTMR) <= rsp_terminate_c; clk_gen_en(CG_GPTMR) <= '0'; @@ -1543,7 +1580,7 @@ begin -- 1-Wire Interface Controller (ONEWIRE) -------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_onewire_inst_true: + neorv32_onewire_enabled: if IO_ONEWIRE_EN generate neorv32_onewire_inst: entity neorv32.neorv32_onewire generic map ( @@ -1562,7 +1599,7 @@ begin ); end generate; - neorv32_onewire_inst_false: + neorv32_onewire_disabled: if not IO_ONEWIRE_EN generate iodev_rsp(IODEV_ONEWIRE) <= rsp_terminate_c; onewire_o <= '1'; @@ -1573,7 +1610,7 @@ begin -- Stream Link Interface (SLINK) ---------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_slink_inst_true: + neorv32_slink_enabled: if IO_SLINK_EN generate neorv32_slink_inst: entity neorv32.neorv32_slink generic map ( @@ -1600,7 +1637,7 @@ begin ); end generate; - neorv32_slink_inst_false: + neorv32_slink_disabled: if not IO_SLINK_EN generate iodev_rsp(IODEV_SLINK) <= rsp_terminate_c; firq(FIRQ_SLINK_RX) <= '0'; @@ -1615,7 +1652,7 @@ begin -- Cyclic Redundancy Check Unit (CRC) ----------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_crc_inst_true: + neorv32_crc_enabled: if IO_CRC_EN generate neorv32_crc_inst: entity neorv32.neorv32_crc port map ( @@ -1626,7 +1663,7 @@ begin ); end generate; - neorv32_crc_inst_false: + neorv32_crc_disabled: if not IO_CRC_EN generate iodev_rsp(IODEV_CRC) <= rsp_terminate_c; end generate; @@ -1634,7 +1671,7 @@ begin -- System Configuration Information Memory (SYSINFO) -------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_sysinfo_inst_true: + neorv32_sysinfo_enabled: if io_sysinfo_en_c generate neorv32_sysinfo_inst: entity neorv32.neorv32_sysinfo generic map ( @@ -1691,7 +1728,7 @@ begin ); end generate; - neorv32_sysinfo_inst_false: + neorv32_sysinfo_disabled: if not io_sysinfo_en_c generate iodev_rsp(IODEV_SYSINFO) <= rsp_terminate_c; end generate; @@ -1704,7 +1741,7 @@ begin -- On-Chip Debugger Complex -- ************************************************************************************************************************** - neorv32_ocd_inst_true: + neorv32_ocd_enabled: if OCD_EN generate -- On-Chip Debugger - Debug Transport Module (DTM) ---------------------------------------- @@ -1744,9 +1781,9 @@ begin halt_req_o => dci_haltreq ); - end generate; + end generate; -- /neorv32_ocd_enabled - neorv32_debug_ocd_inst_false: + neorv32_debug_ocd_disabled: if not OCD_EN generate iodev_rsp(IODEV_OCD) <= rsp_terminate_c; jtag_tdo_o <= jtag_tdi_i; -- JTAG pass-through diff --git a/rtl/file_list_cpu.f b/rtl/file_list_cpu.f index 2df0a0b67..60e4609de 100644 --- a/rtl/file_list_cpu.f +++ b/rtl/file_list_cpu.f @@ -14,4 +14,5 @@ NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_alu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_lsu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_pmp.vhd +NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_icc.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu.vhd diff --git a/rtl/file_list_soc.f b/rtl/file_list_soc.f index b495e38f7..76da9d04d 100644 --- a/rtl/file_list_soc.f +++ b/rtl/file_list_soc.f @@ -15,6 +15,7 @@ NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_alu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_lsu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_pmp.vhd +NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_icc.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_bus.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cache.vhd diff --git a/rtl/system_integration/neorv32_vivado_ip.tcl b/rtl/system_integration/neorv32_vivado_ip.tcl index ac3894257..1d5b6e9c1 100644 --- a/rtl/system_integration/neorv32_vivado_ip.tcl +++ b/rtl/system_integration/neorv32_vivado_ip.tcl @@ -161,6 +161,14 @@ proc setup_ip_gui {} { { CLOCK_FREQUENCY {Clock Frequency (Hz)} {Frequency of the clk input signal in Hz} } } + set group [add_group $page {Core Complex}] + add_params $group { + { DUAL_CORE_EN {Number of CPU cores} {} } + } + set_property widget {comboBox} [ipgui::get_guiparamspec -name "DUAL_CORE_EN" -component [ipx::current_core] ] + set_property value_validation_type pairs [ipx::get_user_parameters DUAL_CORE_EN -of_objects [ipx::current_core]] + set_property value_validation_pairs {{Single-core} false {SMP dual-core} true} [ipx::get_user_parameters DUAL_CORE_EN -of_objects [ipx::current_core]] + set group [add_group $page {Boot Configuration}] add_params $group { { BOOT_MODE_SELECT {Boot mode select} {Processor boot configuration} } diff --git a/rtl/system_integration/neorv32_vivado_ip.vhd b/rtl/system_integration/neorv32_vivado_ip.vhd index a246ef951..f82177e18 100644 --- a/rtl/system_integration/neorv32_vivado_ip.vhd +++ b/rtl/system_integration/neorv32_vivado_ip.vhd @@ -26,116 +26,118 @@ entity neorv32_vivado_ip is -- Configuration Generics -- ------------------------------------------------------------ -- Clocking -- - CLOCK_FREQUENCY : natural := 100_000_000; + CLOCK_FREQUENCY : natural := 100_000_000; + -- Dual-Core Configuration -- + DUAL_CORE_EN : boolean := false; -- Identification -- - JEDEC_ID : std_logic_vector(10 downto 0) := "00000000000"; + JEDEC_ID : std_logic_vector(10 downto 0) := "00000000000"; -- Boot Configuration -- - BOOT_MODE_SELECT : natural range 0 to 2 := 0; + BOOT_MODE_SELECT : natural range 0 to 2 := 0; BOOT_ADDR_CUSTOM : std_ulogic_vector(31 downto 0) := x"00000000"; -- On-Chip Debugger (OCD) -- - OCD_EN : boolean := false; - OCD_AUTHENTICATION : boolean := false; + OCD_EN : boolean := false; + OCD_AUTHENTICATION : boolean := false; -- RISC-V CPU Extensions -- - RISCV_ISA_C : boolean := false; - RISCV_ISA_E : boolean := false; - RISCV_ISA_M : boolean := false; - RISCV_ISA_U : boolean := false; - RISCV_ISA_Zaamo : boolean := false; - RISCV_ISA_Zba : boolean := false; - RISCV_ISA_Zbb : boolean := false; - RISCV_ISA_Zbkb : boolean := false; - RISCV_ISA_Zbkc : boolean := false; - RISCV_ISA_Zbkx : boolean := false; - RISCV_ISA_Zbs : boolean := false; - RISCV_ISA_Zfinx : boolean := false; - RISCV_ISA_Zicntr : boolean := false; - RISCV_ISA_Zicond : boolean := false; - RISCV_ISA_Zihpm : boolean := false; - RISCV_ISA_Zmmul : boolean := false; - RISCV_ISA_Zknd : boolean := false; - RISCV_ISA_Zkne : boolean := false; - RISCV_ISA_Zknh : boolean := false; - RISCV_ISA_Zksed : boolean := false; - RISCV_ISA_Zksh : boolean := false; - RISCV_ISA_Zxcfu : boolean := false; + RISCV_ISA_C : boolean := false; + RISCV_ISA_E : boolean := false; + RISCV_ISA_M : boolean := false; + RISCV_ISA_U : boolean := false; + RISCV_ISA_Zaamo : boolean := false; + RISCV_ISA_Zba : boolean := false; + RISCV_ISA_Zbb : boolean := false; + RISCV_ISA_Zbkb : boolean := false; + RISCV_ISA_Zbkc : boolean := false; + RISCV_ISA_Zbkx : boolean := false; + RISCV_ISA_Zbs : boolean := false; + RISCV_ISA_Zfinx : boolean := false; + RISCV_ISA_Zicntr : boolean := false; + RISCV_ISA_Zicond : boolean := false; + RISCV_ISA_Zihpm : boolean := false; + RISCV_ISA_Zmmul : boolean := false; + RISCV_ISA_Zknd : boolean := false; + RISCV_ISA_Zkne : boolean := false; + RISCV_ISA_Zknh : boolean := false; + RISCV_ISA_Zksed : boolean := false; + RISCV_ISA_Zksh : boolean := false; + RISCV_ISA_Zxcfu : boolean := false; -- Tuning Options -- - CPU_FAST_MUL_EN : boolean := false; - CPU_FAST_SHIFT_EN : boolean := false; - CPU_RF_HW_RST_EN : boolean := false; + CPU_FAST_MUL_EN : boolean := false; + CPU_FAST_SHIFT_EN : boolean := false; + CPU_RF_HW_RST_EN : boolean := false; -- Physical Memory Protection (PMP) -- - PMP_NUM_REGIONS : natural range 0 to 16 := 0; - PMP_MIN_GRANULARITY : natural := 4; - PMP_TOR_MODE_EN : boolean := false; - PMP_NAP_MODE_EN : boolean := false; + PMP_NUM_REGIONS : natural range 0 to 16 := 0; + PMP_MIN_GRANULARITY : natural := 4; + PMP_TOR_MODE_EN : boolean := false; + PMP_NAP_MODE_EN : boolean := false; -- Hardware Performance Monitors (HPM) -- - HPM_NUM_CNTS : natural range 0 to 13 := 0; - HPM_CNT_WIDTH : natural range 0 to 64 := 40; + HPM_NUM_CNTS : natural range 0 to 13 := 0; + HPM_CNT_WIDTH : natural range 0 to 64 := 40; -- Internal Instruction memory -- - MEM_INT_IMEM_EN : boolean := false; - MEM_INT_IMEM_SIZE : natural := 16384; + MEM_INT_IMEM_EN : boolean := false; + MEM_INT_IMEM_SIZE : natural := 16384; -- Internal Data memory -- - MEM_INT_DMEM_EN : boolean := false; - MEM_INT_DMEM_SIZE : natural := 8192; + MEM_INT_DMEM_EN : boolean := false; + MEM_INT_DMEM_SIZE : natural := 8192; -- Internal Cache memory -- - ICACHE_EN : boolean := false; - ICACHE_NUM_BLOCKS : natural range 1 to 256 := 4; - ICACHE_BLOCK_SIZE : natural range 4 to 2**16 := 64; + ICACHE_EN : boolean := false; + ICACHE_NUM_BLOCKS : natural range 1 to 256 := 4; + ICACHE_BLOCK_SIZE : natural range 4 to 2**16 := 64; -- Internal Data Cache (dCACHE) -- - DCACHE_EN : boolean := false; - DCACHE_NUM_BLOCKS : natural range 1 to 256 := 4; - DCACHE_BLOCK_SIZE : natural range 4 to 2**16 := 64; + DCACHE_EN : boolean := false; + DCACHE_NUM_BLOCKS : natural range 1 to 256 := 4; + DCACHE_BLOCK_SIZE : natural range 4 to 2**16 := 64; -- External Bus Interface -- - XBUS_EN : boolean := true; - XBUS_TIMEOUT : natural range 8 to 65536 := 64; - XBUS_REGSTAGE_EN : boolean := false; - XBUS_CACHE_EN : boolean := false; - XBUS_CACHE_NUM_BLOCKS : natural range 1 to 256 := 8; - XBUS_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 256; + XBUS_EN : boolean := true; + XBUS_TIMEOUT : natural range 8 to 65536 := 64; + XBUS_REGSTAGE_EN : boolean := false; + XBUS_CACHE_EN : boolean := false; + XBUS_CACHE_NUM_BLOCKS : natural range 1 to 256 := 8; + XBUS_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 256; -- Execute in-place module (XIP) -- - XIP_EN : boolean := false; - XIP_CACHE_EN : boolean := false; - XIP_CACHE_NUM_BLOCKS : natural range 1 to 256 := 8; - XIP_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 256; + XIP_EN : boolean := false; + XIP_CACHE_EN : boolean := false; + XIP_CACHE_NUM_BLOCKS : natural range 1 to 256 := 8; + XIP_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 256; -- External Interrupts Controller (XIRQ) -- - XIRQ_EN : boolean := false; - XIRQ_NUM_CH : natural range 1 to 32 := 1; -- variable-sized ports must be at least 0 downto 0; #974 + XIRQ_EN : boolean := false; + XIRQ_NUM_CH : natural range 1 to 32 := 1; -- variable-sized ports must be at least 0 downto 0; #974 -- Processor peripherals -- - IO_GPIO_EN : boolean := false; - IO_GPIO_IN_NUM : natural range 1 to 64 := 1; -- variable-sized ports must be at least 0 downto 0; #974 - IO_GPIO_OUT_NUM : natural range 1 to 64 := 1; - IO_CLINT_EN : boolean := false; - IO_UART0_EN : boolean := false; - IO_UART0_RX_FIFO : natural range 1 to 2**15 := 1; - IO_UART0_TX_FIFO : natural range 1 to 2**15 := 1; - IO_UART1_EN : boolean := false; - IO_UART1_RX_FIFO : natural range 1 to 2**15 := 1; - IO_UART1_TX_FIFO : natural range 1 to 2**15 := 1; - IO_SPI_EN : boolean := false; - IO_SPI_FIFO : natural range 1 to 2**15 := 1; - IO_SDI_EN : boolean := false; - IO_SDI_FIFO : natural range 1 to 2**15 := 1; - IO_TWI_EN : boolean := false; - IO_TWI_FIFO : natural range 1 to 2**15 := 1; - IO_TWD_EN : boolean := false; - IO_TWD_FIFO : natural range 1 to 2**15 := 1; - IO_PWM_EN : boolean := false; - IO_PWM_NUM_CH : natural range 1 to 16 := 1; -- variable-sized ports must be at least 0 downto 0; #974 - IO_WDT_EN : boolean := false; - IO_TRNG_EN : boolean := false; - IO_TRNG_FIFO : natural range 1 to 2**15 := 1; - IO_CFS_EN : boolean := false; - IO_CFS_CONFIG : std_logic_vector(31 downto 0) := x"00000000"; - IO_CFS_IN_SIZE : natural range 1 to 4096 := 32; -- variable-sized ports must be at least 0 downto 0; #974 - IO_CFS_OUT_SIZE : natural range 1 to 4096 := 32; -- variable-sized ports must be at least 0 downto 0; #974 - IO_NEOLED_EN : boolean := false; - IO_NEOLED_TX_FIFO : natural range 1 to 2**15 := 1; - IO_GPTMR_EN : boolean := false; - IO_ONEWIRE_EN : boolean := false; - IO_DMA_EN : boolean := false; + IO_GPIO_EN : boolean := false; + IO_GPIO_IN_NUM : natural range 1 to 64 := 1; -- variable-sized ports must be at least 0 downto 0; #974 + IO_GPIO_OUT_NUM : natural range 1 to 64 := 1; + IO_CLINT_EN : boolean := false; + IO_UART0_EN : boolean := false; + IO_UART0_RX_FIFO : natural range 1 to 2**15 := 1; + IO_UART0_TX_FIFO : natural range 1 to 2**15 := 1; + IO_UART1_EN : boolean := false; + IO_UART1_RX_FIFO : natural range 1 to 2**15 := 1; + IO_UART1_TX_FIFO : natural range 1 to 2**15 := 1; + IO_SPI_EN : boolean := false; + IO_SPI_FIFO : natural range 1 to 2**15 := 1; + IO_SDI_EN : boolean := false; + IO_SDI_FIFO : natural range 1 to 2**15 := 1; + IO_TWI_EN : boolean := false; + IO_TWI_FIFO : natural range 1 to 2**15 := 1; + IO_TWD_EN : boolean := false; + IO_TWD_FIFO : natural range 1 to 2**15 := 1; + IO_PWM_EN : boolean := false; + IO_PWM_NUM_CH : natural range 1 to 16 := 1; -- variable-sized ports must be at least 0 downto 0; #974 + IO_WDT_EN : boolean := false; + IO_TRNG_EN : boolean := false; + IO_TRNG_FIFO : natural range 1 to 2**15 := 1; + IO_CFS_EN : boolean := false; + IO_CFS_CONFIG : std_logic_vector(31 downto 0) := x"00000000"; + IO_CFS_IN_SIZE : natural range 1 to 4096 := 32; -- variable-sized ports must be at least 0 downto 0; #974 + IO_CFS_OUT_SIZE : natural range 1 to 4096 := 32; -- variable-sized ports must be at least 0 downto 0; #974 + IO_NEOLED_EN : boolean := false; + IO_NEOLED_TX_FIFO : natural range 1 to 2**15 := 1; + IO_GPTMR_EN : boolean := false; + IO_ONEWIRE_EN : boolean := false; + IO_DMA_EN : boolean := false; IO_SLINK_EN : boolean := false; - IO_SLINK_RX_FIFO : natural range 1 to 2**15 := 1; - IO_SLINK_TX_FIFO : natural range 1 to 2**15 := 1; - IO_CRC_EN : boolean := false + IO_SLINK_RX_FIFO : natural range 1 to 2**15 := 1; + IO_SLINK_TX_FIFO : natural range 1 to 2**15 := 1; + IO_CRC_EN : boolean := false ); port ( -- ------------------------------------------------------------ @@ -353,6 +355,8 @@ begin generic map ( -- Clocking -- CLOCK_FREQUENCY => CLOCK_FREQUENCY, + -- Dual-Core Configuration -- + DUAL_CORE_EN => DUAL_CORE_EN, -- Identification -- JEDEC_ID => std_ulogic_vector(JEDEC_ID), -- Boot Configuration -- diff --git a/sw/common/crt0.S b/sw/common/crt0.S index 82586d08b..440338a33 100644 --- a/sw/common/crt0.S +++ b/sw/common/crt0.S @@ -72,49 +72,45 @@ __crt0_entry: // ************************************************************************************************ -// Dual-core setup - wait for configuration if we are not core 0. +// SMP multi-core setup - wait for configuration if we are not core 0. // ************************************************************************************************ -#ifndef DISABLE_DUALCORE -__crt0_dualcore_check: - beqz x1, __crt0_dualcore_primary // proceed with normal boot-up if we are core 0 +#ifndef DISABLE_MULTICORE +__crt0_multicore_check: + beqz x1, __crt0_multicore_primary // proceed with normal boot-up if we are core 0 // setup machine software interrupt - la x15, __crt0_dualcore_mswi - csrw mtvec, x15 // install interrupt handler - csrsi mie, 1 << 3 // enable software interrupt source - csrsi mstatus, 1 << 3 // enable machine-level interrupts - j __crt0_sleep // wait for interrupt in sleep mode + la x15, __crt0_multicore_wakeup + csrw mtvec, x15 // install interrupt handler + csrsi mie, 1 << 3 // enable software interrupt source + csrsi mstatus, 1 << 3 // enable machine-level interrupts + j __crt0_sleep // wait for interrupt in sleep mode // machine software interrupt handler -__crt0_dualcore_mswi: +__crt0_multicore_wakeup: + li x14, 0xfff40000 // CLINT.MSWI base address + slli x15, x1, 2 // offset = hart_id * 4 + add x14, x14, x15 + sw zero, 0(x14) // CLINT.MSWI[hart_id] + csrr x14, mcause - li x15, 0x80000003 // is machine software interrupt? - beq x14, x15, __crt0_dualcore_init // start initialization - mret // go back to sleep if incorrect trap - - // get configuration struct address -__crt0_dualcore_init: - fence // reload data cache - li x13, 0xfff44000 // CLINT.MTIMCEMP base address - lw x13, 8(x13) // CLINT.MTIMCEMP[1].low = address of configuration struct - li x14, 0x1337cafe // expected magic word - lw x15, 0(x13) // __neorv32_rte_smp_startup.magic_word - beq x14, x15, __crt0_dualcore_valid - mret // go back to sleep if invalid configuration - -__crt0_dualcore_valid: - lw x2, 8(x13) // sp = __neorv32_rte_smp_startup.stack_upper - lw x12, 12(x13) // __neorv32_rte_smp_startup.entry_point - sw x0, 0(x13) // invalidate configuration struct - - // acknowledge startup by clearing software interrupt - li x14, 0xfff40000 // CLINT.MSWI base address - sw zero, 4(x14) // CLINT.MSWI[1] - - // start main function - j __crt0_main_entry - -__crt0_dualcore_primary: + li x15, 0x80000003 // is machine software interrupt? + bne x14, x15, __crt0_multicore_exit // go back to sleep if not + + // get launch configuration from core 0 + csrw 0xbc2, zero // ICC.SR: link select = 0 + csrr x13, 0xbc0 // ICC.RX: signature + li x14, 0xffab4321 // expected signature + bne x14, x13, __crt0_multicore_exit // abort if incorrect signature + + csrw 0xbc1, x14 // ICC.TX: acknowledge start + csrr x2, 0xbc0 // ICC.RX: stack_top -> sp + csrr x12, 0xbc0 // ICC.RX: entry_point + j __crt0_main_entry // start main function + +__crt0_multicore_exit: + mret // go back to sleep + +__crt0_multicore_primary: #endif @@ -171,11 +167,7 @@ __crt0_main_entry: fence // reload instruction cache fence.i // reload instruction cache - // re-initialize trap CSRs - csrw mstatus, x5 - csrw mcause, zero - csrw mepc, zero - + csrw mstatus, x5 // re-initialize addi x10, zero, 0 // x10 = a0 = argc = 0 addi x11, zero, 0 // x11 = a1 = argv = 0 jalr x1, x12 // call actual main function; put return address in ra diff --git a/sw/example/demo_dual_core/main.c b/sw/example/demo_dual_core/main.c index 934d7a464..2336da3da 100644 --- a/sw/example/demo_dual_core/main.c +++ b/sw/example/demo_dual_core/main.c @@ -86,11 +86,12 @@ int main(void) { neorv32_uart0_printf("Launching core1...\n"); // Launch execution of core 1. Arguments: - // 1st:: "main_core1" is the entry point for the core and we provide a total of 2kB of stack for it. - // 2nd:: Pointer to the core's stack memory array. - // 3rd:: Size of the core's stack memory array. + // 1st: Hart ID of the core that we want to launch. + // 2nd: "main_core1" is the entry point for the core and we provide a total of 2kB of stack for it. + // 3rd: Pointer to the core's stack memory array. + // 4th: Size of the core's stack memory array. - int smp_launch_rc = neorv32_rte_smp_launch(main_core1, (uint8_t*)core1_stack, sizeof(core1_stack)); + int smp_launch_rc = neorv32_smp_launch(1, main_core1, (uint8_t*)core1_stack, sizeof(core1_stack)); // Here we are using a statically allocated array as stack memory. Alternatively, malloc // could be used (it is recommend to align the stack memory on a 16-byte boundary): diff --git a/sw/example/demo_dual_core/run_check.sh b/sw/example/demo_dual_core/run_check.sh deleted file mode 100644 index 5f6a5c2a4..000000000 --- a/sw/example/demo_dual_core/run_check.sh +++ /dev/null @@ -1 +0,0 @@ -make USER_FLAGS+="-DUART0_SIM_MODE -DUART1_SIM_MODE" clean_all asm install sim diff --git a/sw/example/processor_check/main.c b/sw/example/processor_check/main.c index 5c7bf0295..cc3bc4c11 100644 --- a/sw/example/processor_check/main.c +++ b/sw/example/processor_check/main.c @@ -571,10 +571,10 @@ int main() { neorv32_cpu_csr_clr(CSR_MSTATUS, 1 << CSR_MSTATUS_MIE); tmp_a = trap_cnt; // current number of traps - // try executing some illegal instructions asm volatile (".word 0x58007053"); // unsupported fsqrt.s x0, x0 - asm volatile (".word 0x0e00202f"); // unsupported amoswap.w x0, x0, (x0) + asm volatile (".word 0x0e00302f"); // unsupported amoswap.D x0, x0, (x0) + asm volatile (".word 0x1000202f"); // unsupported lr.w x0, (x0) asm volatile (".word 0x34004073"); // illegal CSR access funct3 (using mscratch) asm volatile (".word 0x30200077"); // mret with illegal opcode asm volatile (".word 0x3020007f"); // mret with illegal opcode @@ -603,7 +603,7 @@ int main() { invalid_instr = 0x08812681; // mtinst: pre-decompressed; clear bit 1 if compressed instruction } else { // C extension disabled - tmp_a += 15; + tmp_a += 16; invalid_instr = 0xfe002fe3; } @@ -2146,22 +2146,17 @@ int main() { neorv32_cpu_csr_write(CSR_MCAUSE, mcause_never_c); PRINT_STANDARD("[%i] Dual-core ", cnt_test); - if ((neorv32_cpu_csr_read(CSR_MHARTID) == 0) && // we need to be core 0 - (NEORV32_SYSINFO->MISC[SYSINFO_MISC_HART] > 1) && // we need at least two cores + if ((NEORV32_SYSINFO->MISC[SYSINFO_MISC_HART] > 1) && // we need at least two cores (neorv32_clint_available() != 0)) { // we need the CLINT cnt_test++; // enable machine software interrupt neorv32_cpu_csr_write(CSR_MIE, 1 << CSR_MIE_MSIE); - // wait some time for the IRQ to arrive the CPU - asm volatile ("nop"); - asm volatile ("nop"); - - // launch core1 - tmp_a = (uint32_t)neorv32_rte_smp_launch(core1_main, (uint8_t*)core1_stack, sizeof(core1_stack)); + // launch core 1 + tmp_a = (uint32_t)neorv32_smp_launch(1, core1_main, (uint8_t*)core1_stack, sizeof(core1_stack)); - // wait for software interrupt in sleep mode + // wait for software interrupt (issued by core 1) in sleep mode neorv32_cpu_sleep(); // disable interrupts and clear software interrupt diff --git a/sw/lib/include/neorv32.h b/sw/lib/include/neorv32.h index 888a9db68..24b8bcc57 100644 --- a/sw/lib/include/neorv32.h +++ b/sw/lib/include/neorv32.h @@ -1,7 +1,7 @@ // ================================================================================ // // The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // // Copyright (c) NEORV32 contributors. // -// Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. // +// Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. // // Licensed under the BSD-3-Clause license, see LICENSE for details. // // SPDX-License-Identifier: BSD-3-Clause // // ================================================================================ // @@ -252,9 +252,6 @@ typedef union { // helper functions #include "neorv32_aux.h" -// legacy compatibility layer -#include "neorv32_legacy.h" - // CPU core #include "neorv32_cpu.h" #include "neorv32_cpu_csr.h" @@ -262,6 +259,7 @@ typedef union { // NEORV32 runtime environment #include "neorv32_rte.h" +#include "neorv32_smp.h" // IO/peripheral devices #include "neorv32_cfs.h" diff --git a/sw/lib/include/neorv32_cpu_csr.h b/sw/lib/include/neorv32_cpu_csr.h index de650d4a7..d6432c31e 100644 --- a/sw/lib/include/neorv32_cpu_csr.h +++ b/sw/lib/include/neorv32_cpu_csr.h @@ -1,7 +1,7 @@ // ================================================================================ // // The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // // Copyright (c) NEORV32 contributors. // -// Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. // +// Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. // // Licensed under the BSD-3-Clause license, see LICENSE for details. // // SPDX-License-Identifier: BSD-3-Clause // // ================================================================================ // @@ -137,6 +137,12 @@ enum NEORV32_CSR_enum { CSR_MHPMCOUNTER14H = 0xb8e, /**< 0xb8e - mhpmcounter14h: Machine hardware performance monitor 14 counter high word */ CSR_MHPMCOUNTER15H = 0xb8f, /**< 0xb8f - mhpmcounter15h: Machine hardware performance monitor 15 counter high word */ + /* inter-core communication */ + CSR_MXICCRXD = 0xbc0, /**< 0xbc0 - mxiccrxd: Machine ICC link RX data */ + CSR_MXICCTXD = 0xbc1, /**< 0xbc1 - mxicctxd: Machine ICC link TX data */ + CSR_MXICCSR0 = 0xbc2, /**< 0xbc1 - mxiccsr0: Machine ICC link status register 0 (#NEORV32_CSR_MXICCSR_enum) */ + CSR_MXICCSR1 = 0xbc3, /**< 0xbc1 - mxiccsr1: Machine ICC link status register 1 (#NEORV32_CSR_MXICCSR_enum) */ + /* user counters and timers */ CSR_CYCLE = 0xc00, /**< 0xc00 - cycle: User cycle counter low word */ CSR_INSTRET = 0xc02, /**< 0xc02 - instret: User instructions-retired counter low word */ @@ -150,7 +156,7 @@ enum NEORV32_CSR_enum { CSR_MIMPID = 0xf13, /**< 0xf13 - mimpid: Machine implementation ID */ CSR_MHARTID = 0xf14, /**< 0xf14 - mhartid: Machine hardware thread ID */ CSR_MCONFIGPTR = 0xf15, /**< 0xf15 - mconfigptr: Machine configuration pointer register */ - CSR_MXISA = 0xfc0 /**< 0xfc0 - mxisa: Machine extended ISA and extensions (#NEORV32_CSR_XISA_enum) */ + CSR_MXISA = 0xfc0 /**< 0xfc0 - mxisa: Machine extended ISA and extensions (#NEORV32_CSR_MXISA_enum) */ }; @@ -302,7 +308,7 @@ enum NEORV32_CSR_MISA_enum { /**********************************************************************//** * CPU mxisa CSR (r/-): Machine extended instruction set extensions (NEORV32-specific) **************************************************************************/ -enum NEORV32_CSR_XISA_enum { +enum NEORV32_CSR_MXISA_enum { // ISA (sub-)extensions CSR_MXISA_ZICSR = 0, /**< CPU mxisa CSR (0): privileged architecture (r/-)*/ CSR_MXISA_ZIFENCEI = 1, /**< CPU mxisa CSR (1): instruction stream sync (r/-)*/ @@ -340,6 +346,18 @@ enum NEORV32_CSR_XISA_enum { }; +/**********************************************************************//** + * CPU mxiccsr CSR (r/w): Inter-core communication control and status (NEORV32-specific) + **************************************************************************/ +enum NEORV32_CSR_MXICCSR_enum { + CSR_MXICCSR_LINK_LSB = 0, /**< CPU mxiccsr CSR (0): link/hart select LSB (r/w)*/ + CSR_MXICCSR_LINK_MSB = 1, /**< CPU mxiccsr CSR (1): link/hart select MSB (r/w)*/ + + CSR_MXICCSR_TX_FREE = 30, /**< CPU mxiccsr CSR (30): Free space in selected link's TX FIFO (r/-)*/ + CSR_MXICCSR_RX_AVAIL = 31 /**< CPU mxiccsr CSR (31): Data available in selected link's RX FIFO (r/-)*/ +}; + + /**********************************************************************//** * CPU mhpmevent hardware performance monitor events **************************************************************************/ diff --git a/sw/lib/include/neorv32_legacy.h b/sw/lib/include/neorv32_legacy.h deleted file mode 100644 index db5a0e1aa..000000000 --- a/sw/lib/include/neorv32_legacy.h +++ /dev/null @@ -1,42 +0,0 @@ -// ================================================================================ // -// The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // -// Copyright (c) NEORV32 contributors. // -// Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. // -// Licensed under the BSD-3-Clause license, see LICENSE for details. // -// SPDX-License-Identifier: BSD-3-Clause // -// ================================================================================ // - -/** - * @file neorv32_legacy.h - * @brief Legacy compatibility layer. - * @warning Deprecated! Do not use for new designs! - * @see https://stnolting.github.io/neorv32/sw/files.html - */ - -#ifndef neorv32_legacy_h -#define neorv32_legacy_h - -#include - - -/**********************************************************************//** - * @name GPIO aliases - **************************************************************************/ -/**@{*/ -#define INPUT_LO INPUT[0] -#define INPUT_HI INPUT[1] -#define OUTPUT_LO OUTPUT[0] -#define OUTPUT_HI OUTPUT[1] -/**@}*/ - - -/**********************************************************************//** - * @name Atomic LR/SC instructions - **************************************************************************/ -/**@{*/ -#define neorv32_cpu_load_reservate_word(addr, wdata) neorv32_cpu_amolr(addr, wdata) -#define neorv32_cpu_store_conditional_word(addr, wdata) neorv32_cpu_amosc(addr, wdata) -/**@}*/ - - -#endif // neorv32_legacy_h diff --git a/sw/lib/include/neorv32_rte.h b/sw/lib/include/neorv32_rte.h index 31ef18db3..50473d9df 100644 --- a/sw/lib/include/neorv32_rte.h +++ b/sw/lib/include/neorv32_rte.h @@ -67,7 +67,6 @@ int neorv32_rte_handler_uninstall(int id); void neorv32_rte_debug_handler(void); uint32_t neorv32_rte_context_get(int x); void neorv32_rte_context_put(int x, uint32_t data); -int neorv32_rte_smp_launch(void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); /**@}*/ #endif // neorv32_rte_h diff --git a/sw/lib/include/neorv32_smp.h b/sw/lib/include/neorv32_smp.h new file mode 100644 index 000000000..1ffd6d0db --- /dev/null +++ b/sw/lib/include/neorv32_smp.h @@ -0,0 +1,79 @@ +// ================================================================================ // +// The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // +// Copyright (c) NEORV32 contributors. // +// Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. // +// Licensed under the BSD-3-Clause license, see LICENSE for details. // +// SPDX-License-Identifier: BSD-3-Clause // +// ================================================================================ // + +/** + * @file neorv32_smp.h + * @brief SMP HW driver header file. + */ + +#ifndef neorv32_smp_h +#define neorv32_smp_h + + +/**********************************************************************//** + * @name Prototypes + **************************************************************************/ +/**@{*/ +int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); +/**@}*/ + + +/**********************************************************************//** + * Get data from core via ICC link. + * Check link status before #neorv32_smp_icc_avail(). + * + * @param[in] hart_sel Source core. + * @return Data word (32-bit) received from selected core. + **************************************************************************/ +inline uint32_t __attribute__ ((always_inline)) neorv32_smp_icc_get(int hart_sel) { + + neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); + return neorv32_cpu_csr_read(CSR_MXICCRXD); +} + + +/**********************************************************************//** + * Send data to core via ICC link. + * Check link status before #neorv32_smp_icc_free(). + * + * @param[in] hart_sel Destination core. + * @param[in] data Data word (32-bit) to be send to selected core. + **************************************************************************/ +inline void __attribute__ ((always_inline)) neorv32_smp_icc_put(int hart_sel, uint32_t data) { + + neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); + neorv32_cpu_csr_write(CSR_MXICCTXD, data); +} + + +/**********************************************************************//** + * Check if ICC link data is available. + * + * @param[in] hart_sel Source core. + * @return 0 = no data available, nonzero = data available. + **************************************************************************/ +inline int __attribute__ ((always_inline)) neorv32_smp_icc_avail(int hart_sel) { + + neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); + return neorv32_cpu_csr_read(CSR_MXICCSR0) & (1 << CSR_MXICCSR_RX_AVAIL); +} + + +/**********************************************************************//** + * Check if free space in ICC link. + * + * @param[in] hart_sel Destination core. + * @return 0 = no free space available, nonzero = free space available. + **************************************************************************/ +inline int __attribute__ ((always_inline)) neorv32_smp_icc_free(int hart_sel) { + + neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); + return neorv32_cpu_csr_read(CSR_MXICCSR0) & (1 << CSR_MXICCSR_TX_FREE); +} + +#endif // neorv32_smp_h diff --git a/sw/lib/source/neorv32_rte.c b/sw/lib/source/neorv32_rte.c index 88be7d4c9..1b280078f 100644 --- a/sw/lib/source/neorv32_rte.c +++ b/sw/lib/source/neorv32_rte.c @@ -15,28 +15,20 @@ #include -// // ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ // RTE private variables and functions -// // ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ // the private trap vector look-up table for each CPU core static uint32_t __neorv32_rte_vector_lut[2][NEORV32_RTE_NUM_TRAPS]; -// SMP startup configuration -static volatile struct __attribute__((packed,aligned(4))) { - uint32_t magic_word; // to check for valid configuration - uint32_t stack_lower; // stack begin address (lowest valid address); 16-byte aligned! - uint32_t stack_upper; // stack end address (highest valid address); 16-byte aligned! - uint32_t entry_point; // main function entry address -} __neorv32_rte_smp_startup; - // private helper function static void __neorv32_rte_print_hex_word(uint32_t num); -// // ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ // RTE core functions -// // ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ /**********************************************************************//** * NEORV32 runtime environment (RTE): @@ -414,62 +406,6 @@ void neorv32_rte_debug_handler(void) { } -// ------------------------------------------------------------------------------------------------ -// Multi-core functions -// ------------------------------------------------------------------------------------------------ - -/**********************************************************************//** - * NEORV32 runtime environment (RTE): - * Configure and start secondary CPU (core 1). - * - * @warning This function can be called from core 0 only. - * - * @param[in] entry_point Core 1 main function (must be of type "void entry_point(void)"). - * @param[in] stack_memory Pointer to beginning of core 1 stack memory array. Should be at least 512 bytes. - * @param[in] stack_size_bytes Core 1 stack size in bytes. - * @return 0 if launching succeeded. -1 if hardware configuration error. -2 if core is not responding. - **************************************************************************/ -int neorv32_rte_smp_launch(void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes) { - - // sanity checks - if ((neorv32_cpu_csr_read(CSR_MHARTID) != 0) || // not execute on core 0 - (neorv32_clint_available() == 0) || // CLINT not available - (NEORV32_SYSINFO->MISC[SYSINFO_MISC_HART] == 1)) { // there is only one CPU core - return -1; - } - - // align end of stack to 16-bytes according to the RISC-V ABI (#1021) - uint32_t stack_top = ((uint32_t)stack_memory + (uint32_t)(stack_size_bytes-1)) & 0xfffffff0u; - - // setup launch-configuration struct - __neorv32_rte_smp_startup.magic_word = 0x1337cafeu; - __neorv32_rte_smp_startup.stack_lower = (uint32_t)stack_memory; - __neorv32_rte_smp_startup.stack_upper = stack_top; - __neorv32_rte_smp_startup.entry_point = (uint32_t)entry_point; - - // flush data cache (containing configuration struct) to main memory - asm volatile ("fence"); - - // use CLINT.MTIMECMP[1].low_word to pass the address of the configuration struct - NEORV32_CLINT->MTIMECMP[1].uint32[0] = (uint32_t)&__neorv32_rte_smp_startup; - - // start core 1 by triggering its software interrupt - neorv32_clint_msi_set(1); - - // wait for core 1 to clear its software interrupt - int cnt = 0; - while (1) { - if (neorv32_clint_msi_get(1) == 0) { - return 0; // success! - } - if (cnt > 10000) { - return -2; // timeout; core did not respond - } - cnt++; - } -} - - // ------------------------------------------------------------------------------------------------ // Private helper functions // ------------------------------------------------------------------------------------------------ diff --git a/sw/lib/source/neorv32_smp.c b/sw/lib/source/neorv32_smp.c new file mode 100644 index 000000000..882a46d00 --- /dev/null +++ b/sw/lib/source/neorv32_smp.c @@ -0,0 +1,70 @@ +// ================================================================================ // +// The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // +// Copyright (c) NEORV32 contributors. // +// Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. // +// Licensed under the BSD-3-Clause license, see LICENSE for details. // +// SPDX-License-Identifier: BSD-3-Clause // +// ================================================================================ // + +/** + * @file neorv32_smp.c + * @brief SMP HW driver source file. + */ + +#include + + +/**********************************************************************//** + * Configure and start SMP core. + * + * @warning This function can be executed on core 0 only. + * + * @param[in] hart_id Hart/core select. + * @param[in] entry_point Core's main function (must be of type "void entry_point(void)"). + * @param[in] stack_memory Pointer to beginning of core's stack memory array. Should be at least 512 bytes. + * @param[in] stack_size_bytes Core's stack size in bytes. + * @return 0 if launching succeeded. -1 if invalid hart ID or CLINT not available. -2 if core is not responding. + **************************************************************************/ +int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes) { + + const uint32_t magic_number = 0xffab4321u; + int num_cores = (int)NEORV32_SYSINFO->MISC[SYSINFO_MISC_HART]; + + // sanity checks + if ((neorv32_cpu_csr_read(CSR_MHARTID) != 0) || // this can be executed on core 0 only + (hart_id == 0) || // we cannot launch core 0 + (hart_id > (num_cores-1)) || // selected core not available + (neorv32_clint_available() == 0)) { // we need the CLINT + return -1; + } + + // drain input queue from selected core + while (neorv32_smp_icc_avail(hart_id)) { + neorv32_smp_icc_get(hart_id); + } + + // align end of stack to 16-bytes according to the RISC-V ABI (#1021) + uint32_t stack_top = ((uint32_t)stack_memory + (uint32_t)(stack_size_bytes-1)) & 0xfffffff0u; + + // send launch configuration + neorv32_smp_icc_put(hart_id, magic_number); // identifies valid configuration + neorv32_smp_icc_put(hart_id, stack_top); // top of core's stack + neorv32_smp_icc_put(hart_id, (uint32_t)entry_point); // entry point + + // start core by triggering its software interrupt + neorv32_clint_msi_set(hart_id); + + // wait for start acknowledge + int cnt = 0; + while (1) { + if (neorv32_smp_icc_avail(hart_id)) { + if (neorv32_smp_icc_get(hart_id) == magic_number) { + return 0; + } + } + if (cnt > 1000) { + return -2; // timeout; core did not respond + } + cnt++; + } +}