aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md24
-rw-r--r--dhrystone/testbench.v1
-rw-r--r--picorv32.v26
-rw-r--r--scripts/vivado/synth_area_top.v1
4 files changed, 39 insertions, 13 deletions
diff --git a/README.md b/README.md
index 58ba80c..79cb879 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@ PicoRV32 - A Size-Optimized RISC-V CPU
======================================
PicoRV32 is a CPU core that implements the [RISC-V RV32IMC Instruction Set](http://riscv.org/).
-It can be configured to be a RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
+It can be configured as RV32E, RV32I, RV32IC, RV32IM, or RV32IMC core, and optionally
contains a built-in interrupt controller.
Tools (gcc, binutils, etc..) can be obtained via the [RISC-V Website](http://riscv.org/download.html#tab_tools).
@@ -29,7 +29,7 @@ PicoRV32 is free and open hardware licensed under the [ISC license](http://en.wi
Features and Typical Applications
---------------------------------
-- Small (750-1700 LUTs in 7-Series Xilinx Architecture)
+- Small (750-2000 LUTs in 7-Series Xilinx Architecture)
- High f<sub>max</sub> (250-450 MHz on 7-Series Xilinx FPGAs)
- Selectable native memory interface or AXI4-Lite master
- Optional IRQ support (using a simple custom ISA)
@@ -170,6 +170,12 @@ of 4 bits and then shift in units of 1 bit. This speeds up shift operations,
but adds additional hardware. Set this parameter to 0 to disable the two-stage
shift to further reduce the size of the core.
+#### BARREL_SHIFTER (default = 0)
+
+By default shift operations are performed by successively shifting by a
+small amount (see `TWO_STAGE_SHIFT` above). With this option set, a barrel
+shifter is used instead instead.
+
#### TWO_CYCLE_COMPARE (default = 0)
This relaxes the longest data path a bit by adding an additional FF stage
@@ -294,9 +300,15 @@ in 40 cycles and a `MULH[SU|U]` instruction will execute in 72 cycles.
When `ENABLE_DIV` is activated, then a `DIV[U]/REM[U]` instruction will
execute in 40 cycles.
-Dhrystone benchmark results: 0.391 DMIPS/MHz (688 Dhrystones/Second/MHz)
+When `BARREL_SHIFTER` is activated, a shift operation takes as long as
+any other ALU operation.
+
+The following dhrystone benchmark results are for a core with enabled
+`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
+
+Dhrystone benchmark results: 0.399 DMIPS/MHz (702 Dhrystones/Second/MHz)
-For the Dhrystone benchmark the average CPI is 4.110.
+For the Dhrystone benchmark the average CPI is 4.030.
PicoRV32 Native Memory Interface
@@ -586,7 +598,7 @@ once in advance.
Evaluation: Timing and Utilization on Xilinx 7-Series FPGAs
-----------------------------------------------------------
-The following evaluations have been performed with Vivado 2015.1.
+The following evaluations have been performed with Vivado 2015.4.
#### Timing on Xilinx 7-Series FPGAs
@@ -622,7 +634,7 @@ for the following three cores:
- **PicoRV32 (regular):** The `picorv32` module in its default configuration.
- **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL,
- DIV, and COMPRESSED_ISA features.
+ DIV, BARREL_SHIFTER, and COMPRESSED_ISA features.
See `make area` in [scripts/vivado/](scripts/vivado/).
diff --git a/dhrystone/testbench.v b/dhrystone/testbench.v
index 6251472..6a03ca0 100644
--- a/dhrystone/testbench.v
+++ b/dhrystone/testbench.v
@@ -27,6 +27,7 @@ module testbench;
wire [3:0] mem_la_wstrb;
picorv32 #(
+ .BARREL_SHIFTER(1),
.ENABLE_MUL(1),
.ENABLE_DIV(1)
) uut (
diff --git a/picorv32.v b/picorv32.v
index 4f70a74..9a3410f 100644
--- a/picorv32.v
+++ b/picorv32.v
@@ -43,6 +43,7 @@ module picorv32 #(
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
+ parameter [ 0:0] BARREL_SHIFTER = 0,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] COMPRESSED_ISA = 0,
@@ -889,6 +890,7 @@ module picorv32 #(
reg alu_wait, alu_wait_2;
reg [31:0] alu_add_sub;
+ reg [31:0] alu_shl, alu_shr;
reg alu_eq, alu_ltu, alu_lts;
generate if (TWO_CYCLE_ALU) begin
@@ -897,6 +899,8 @@ module picorv32 #(
alu_eq <= reg_op1 == reg_op2;
alu_lts <= $signed(reg_op1) < $signed(reg_op2);
alu_ltu <= reg_op1 < reg_op2;
+ alu_shl <= reg_op1 << reg_op2[4:0];
+ alu_shr <= $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
end
end else begin
always @* begin
@@ -904,6 +908,8 @@ module picorv32 #(
alu_eq = reg_op1 == reg_op2;
alu_lts = $signed(reg_op1) < $signed(reg_op2);
alu_ltu = reg_op1 < reg_op2;
+ alu_shl = reg_op1 << reg_op2[4:0];
+ alu_shr = $signed({instr_sra || instr_srai ? reg_op1[31] : 1'b0, reg_op1}) >>> reg_op2[4:0];
end
end endgenerate
@@ -938,6 +944,10 @@ module picorv32 #(
alu_out = reg_op1 | reg_op2;
instr_andi || instr_and:
alu_out = reg_op1 & reg_op2;
+ BARREL_SHIFTER && (instr_sll || instr_slli):
+ alu_out = alu_shl;
+ BARREL_SHIFTER && (instr_srl || instr_srli || instr_sra || instr_srai):
+ alu_out = alu_shr;
endcase
end
@@ -1208,16 +1218,16 @@ module picorv32 #(
cpu_state <= cpu_state_ldmem;
mem_do_rinst <= 1;
end
- is_slli_srli_srai: begin
+ is_slli_srli_srai && !BARREL_SHIFTER: begin
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
reg_sh <= decoded_rs2;
cpu_state <= cpu_state_shift;
end
- is_jalr_addi_slti_sltiu_xori_ori_andi: begin
+ is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
- reg_op2 <= decoded_imm;
+ reg_op2 <= is_slli_srli_srai && BARREL_SHIFTER ? decoded_rs2 : decoded_imm;
if (TWO_CYCLE_ALU)
alu_wait <= 1;
else
@@ -1237,7 +1247,7 @@ module picorv32 #(
cpu_state <= cpu_state_stmem;
mem_do_rinst <= 1;
end
- is_sll_srl_sra: begin
+ is_sll_srl_sra && !BARREL_SHIFTER: begin
cpu_state <= cpu_state_shift;
end
default: begin
@@ -1284,7 +1294,7 @@ module picorv32 #(
cpu_state <= cpu_state_stmem;
mem_do_rinst <= 1;
end
- is_sll_srl_sra: begin
+ is_sll_srl_sra && !BARREL_SHIFTER: begin
cpu_state <= cpu_state_shift;
end
default: begin
@@ -1299,8 +1309,6 @@ module picorv32 #(
end
cpu_state_exec: begin
- latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
- latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
reg_out <= reg_pc + decoded_imm;
if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
@@ -1308,6 +1316,8 @@ module picorv32 #(
end else
if (is_beq_bne_blt_bge_bltu_bgeu) begin
latched_rd <= 0;
+ latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
+ latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
if (mem_done)
cpu_state <= cpu_state_fetch;
if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
@@ -1704,6 +1714,7 @@ module picorv32_axi #(
parameter [ 0:0] ENABLE_REGS_16_31 = 1,
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
+ parameter [ 0:0] BARREL_SHIFTER = 0,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] COMPRESSED_ISA = 0,
@@ -1803,6 +1814,7 @@ module picorv32_axi #(
.ENABLE_REGS_16_31 (ENABLE_REGS_16_31 ),
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
+ .BARREL_SHIFTER (BARREL_SHIFTER ),
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
.TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
.COMPRESSED_ISA (COMPRESSED_ISA ),
diff --git a/scripts/vivado/synth_area_top.v b/scripts/vivado/synth_area_top.v
index c2eddeb..6298a86 100644
--- a/scripts/vivado/synth_area_top.v
+++ b/scripts/vivado/synth_area_top.v
@@ -105,6 +105,7 @@ module top_large (
);
picorv32 #(
.COMPRESSED_ISA(1),
+ .BARREL_SHIFTER(1),
.ENABLE_PCPI(1),
.ENABLE_MUL(1),
.ENABLE_IRQ(1)