diff options
author | Clifford Wolf <clifford@clifford.at> | 2016-02-03 16:21:53 +0100 |
---|---|---|
committer | Clifford Wolf <clifford@clifford.at> | 2016-02-03 16:21:53 +0100 |
commit | d7894ca41a9f6936b3b88e6fb232fd8b5e092a31 (patch) | |
tree | be9785b0f63d975bb30abb2c408ae2ce65a89286 | |
parent | 9fa0890bd1de6f280b75cb7d777590dc666a710a (diff) | |
parent | b1a24f4f89fcd5e4790dee961fd6a35b75650b73 (diff) | |
download | picorv32-d7894ca41a9f6936b3b88e6fb232fd8b5e092a31.tar.gz picorv32-d7894ca41a9f6936b3b88e6fb232fd8b5e092a31.zip |
Merge branch 'master' into compressed
Conflicts:
picorv32.v
-rw-r--r-- | README.md | 24 | ||||
-rw-r--r-- | firmware/start.S | 83 | ||||
-rw-r--r-- | picorv32.v | 3 |
3 files changed, 97 insertions, 13 deletions
@@ -27,14 +27,14 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi Features and Typical Applications --------------------------------- -- Small (~1000 LUTs in a 7-Series Xilinx FPGA) -- High fMAX (~250 MHz on 7-Series Xilinx FPGAs) +- Small (750-1700 LUTs in 7-Series Xilinx Architecture) +- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs) - Selectable native memory interface or AXI4-Lite master - Optional IRQ support (using a simple custom ISA) - Optional Co-Processor Interface This CPU is meant to be used as auxiliary processor in FPGA designs and ASICs. Due -to its high fMAX it can be integrated in most existing designs without crossing +to its high f<sub>max</sub> it can be integrated in most existing designs without crossing clock domains. When operated on a lower frequency, it will have a lot of timing slack and thus can be added to a design without compromising timing closure. @@ -251,16 +251,16 @@ The start address of the interrupt handler. Cycles per Instruction Performance ---------------------------------- -*A short reminder: This core is optimized for size, not performance.* +*A short reminder: This core is optimized for size and f<sub>max</sub>, not performance.* Unless stated otherwise, the following numbers apply to a PicoRV32 with ENABLE_REGS_DUALPORT active and connected to a memory that can accommodate requests within one clock cycle. -The average Cycles per Instruction (CPI) is 4 to 5, depending on the mix of -instructions in the code. The CPI numbers for the individual instructions -can be found in the table below. The column "CPI (SP)" contains the -CPI numbers for a core built without ENABLE_REGS_DUALPORT. +The average Cycles per Instruction (CPI) is approximately 4, depending on the mix of +instructions in the code. The CPI numbers for the individual instructions can +be found in the table below. The column "CPI (SP)" contains the CPI numbers for +a core built without ENABLE_REGS_DUALPORT. | Instruction | CPI | CPI (SP) | | ---------------------| ----:| --------:| @@ -277,9 +277,9 @@ CPI numbers for a core built without ENABLE_REGS_DUALPORT. When `ENABLE_MUL` is activated, then a `MUL` instruction will execute in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles. -Dhrystone benchmark results: 0.311 DMIPS/MHz (547 Dhrystones/Second/MHz) +Dhrystone benchmark results: 0.327 DMIPS/MHz (575 Dhrystones/Second/MHz) -For the Dhrystone benchmark the average CPI is 4.144. +For the Dhrystone benchmark the average CPI is 3.945. PicoRV32 Native Memory Interface @@ -531,7 +531,7 @@ pure RV32I target, and install it in `/opt/riscv32i`: git clone https://github.com/riscv/riscv-gnu-toolchain riscv-gnu-toolchain-rv32i cd riscv-gnu-toolchain-rv32i - git checkout 4bcd4f5 + git checkout 06c957a mkdir build; cd build ../configure --with-xlen=32 --with-arch=I --prefix=/opt/riscv32i @@ -541,7 +541,7 @@ The commands will all be named using the prefix `riscv32-unknown-elf-`, which makes it easy to install them side-by-side with the regular riscv-tools, which are using the name prefix `riscv64-unknown-elf-` by default. -*Note: This instructions are for git rev 4bcd4f5 (2015-12-14) of riscv-gnu-toolchain.* +*Note: This instructions are for git rev 06c957a (2016-01-20) of riscv-gnu-toolchain.* Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs diff --git a/firmware/start.S b/firmware/start.S index 34058aa..a5547b8 100644 --- a/firmware/start.S +++ b/firmware/start.S @@ -15,6 +15,11 @@ # undef ENABLE_RVTST #endif +// Only save registers in IRQ wrapper that are to be saved by the caller in +// the RISC-V ABI, with the excpetion of the stack pointer. The IRQ handler +// will save the rest if necessary. I.e. skip x3, x4, x8, x9, and x18-x27. +#undef ENABLE_FASTIRQ + #include "custom_ops.S" .section .text @@ -58,6 +63,23 @@ irq_vec: getq x2, q3 sw x2, 2*4(x1) +#ifdef ENABLE_FASTIRQ + sw x5, 5*4(x1) + sw x6, 6*4(x1) + sw x7, 7*4(x1) + sw x10, 10*4(x1) + sw x11, 11*4(x1) + sw x12, 12*4(x1) + sw x13, 13*4(x1) + sw x14, 14*4(x1) + sw x15, 15*4(x1) + sw x16, 16*4(x1) + sw x17, 17*4(x1) + sw x28, 28*4(x1) + sw x29, 29*4(x1) + sw x30, 30*4(x1) + sw x31, 31*4(x1) +#else sw x3, 3*4(x1) sw x4, 4*4(x1) sw x5, 5*4(x1) @@ -87,9 +109,30 @@ irq_vec: sw x29, 29*4(x1) sw x30, 30*4(x1) sw x31, 31*4(x1) +#endif #else // ENABLE_QREGS +#ifdef ENABLE_FASTIRQ + sw gp, 0*4+0x200(zero) + sw x1, 1*4+0x200(zero) + sw x2, 2*4+0x200(zero) + sw x5, 5*4+0x200(zero) + sw x6, 6*4+0x200(zero) + sw x7, 7*4+0x200(zero) + sw x10, 10*4+0x200(zero) + sw x11, 11*4+0x200(zero) + sw x12, 12*4+0x200(zero) + sw x13, 13*4+0x200(zero) + sw x14, 14*4+0x200(zero) + sw x15, 15*4+0x200(zero) + sw x16, 16*4+0x200(zero) + sw x17, 17*4+0x200(zero) + sw x28, 28*4+0x200(zero) + sw x29, 29*4+0x200(zero) + sw x30, 30*4+0x200(zero) + sw x31, 31*4+0x200(zero) +#else sw gp, 0*4+0x200(zero) sw x1, 1*4+0x200(zero) sw x2, 2*4+0x200(zero) @@ -122,6 +165,7 @@ irq_vec: sw x29, 29*4+0x200(zero) sw x30, 30*4+0x200(zero) sw x31, 31*4+0x200(zero) +#endif #endif // ENABLE_QREGS @@ -160,6 +204,23 @@ irq_vec: lw x2, 2*4(x1) setq q2, x2 +#ifdef ENABLE_FASTIRQ + lw x5, 5*4(x1) + lw x6, 6*4(x1) + lw x7, 7*4(x1) + lw x10, 10*4(x1) + lw x11, 11*4(x1) + lw x12, 12*4(x1) + lw x13, 13*4(x1) + lw x14, 14*4(x1) + lw x15, 15*4(x1) + lw x16, 16*4(x1) + lw x17, 17*4(x1) + lw x28, 28*4(x1) + lw x29, 29*4(x1) + lw x30, 30*4(x1) + lw x31, 31*4(x1) +#else lw x3, 3*4(x1) lw x4, 4*4(x1) lw x5, 5*4(x1) @@ -189,6 +250,7 @@ irq_vec: lw x29, 29*4(x1) lw x30, 30*4(x1) lw x31, 31*4(x1) +#endif getq x1, q1 getq x2, q2 @@ -201,6 +263,26 @@ irq_vec: sbreak 1: +#ifdef ENABLE_FASTIRQ + lw gp, 0*4+0x200(zero) + lw x1, 1*4+0x200(zero) + lw x2, 2*4+0x200(zero) + lw x5, 5*4+0x200(zero) + lw x6, 6*4+0x200(zero) + lw x7, 7*4+0x200(zero) + lw x10, 10*4+0x200(zero) + lw x11, 11*4+0x200(zero) + lw x12, 12*4+0x200(zero) + lw x13, 13*4+0x200(zero) + lw x14, 14*4+0x200(zero) + lw x15, 15*4+0x200(zero) + lw x16, 16*4+0x200(zero) + lw x17, 17*4+0x200(zero) + lw x28, 28*4+0x200(zero) + lw x29, 29*4+0x200(zero) + lw x30, 30*4+0x200(zero) + lw x31, 31*4+0x200(zero) +#else lw gp, 0*4+0x200(zero) lw x1, 1*4+0x200(zero) lw x2, 2*4+0x200(zero) @@ -233,6 +315,7 @@ irq_vec: lw x29, 29*4+0x200(zero) lw x30, 30*4+0x200(zero) lw x31, 31*4+0x200(zero) +#endif #endif // ENABLE_QREGS @@ -353,7 +353,7 @@ module picorv32 #( 0: begin mem_addr <= mem_la_addr; mem_wdata <= mem_la_wdata; - mem_wstrb <= mem_la_wstrb; + mem_wstrb <= mem_la_wstrb & {4{mem_la_write}}; if (mem_do_prefetch || mem_do_rinst) begin current_insn_addr <= next_pc; end @@ -945,6 +945,7 @@ module picorv32 #( latched_is_lh <= 0; latched_is_lb <= 0; pcpi_valid <= 0; + pcpi_timeout <= 0; irq_active <= 0; irq_mask <= ~0; next_irq_pending = 0; |