aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--README.md6
-rw-r--r--dhrystone/Makefile2
-rw-r--r--dhrystone/testbench.v2
-rw-r--r--picorv32.v124
-rw-r--r--scripts/smtbmc/.gitignore2
-rw-r--r--scripts/smtbmc/mulcmp.sh12
-rw-r--r--scripts/smtbmc/mulcmp.v87
-rw-r--r--scripts/smtbmc/notrap_validop.sh6
-rw-r--r--scripts/smtbmc/tracecmp.sh2
10 files changed, 199 insertions, 46 deletions
diff --git a/Makefile b/Makefile
index eb3aa8a..fb81b59 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ check-yices: check.smt2
check.smt2: picorv32.v
yosys -v2 -p 'read_verilog -formal picorv32.v' \
-p 'prep -top picorv32 -nordff' \
- -p 'write_smt2 -bv -mem -wires check.smt2'
+ -p 'write_smt2 -wires check.smt2'
test_sp: testbench_sp.vvp firmware/firmware.hex
vvp -N testbench_sp.vvp
diff --git a/README.md b/README.md
index 79c12be..bc3fe9b 100644
--- a/README.md
+++ b/README.md
@@ -345,11 +345,11 @@ When `BARREL_SHIFTER` is activated, a shift operation takes as long as
any other ALU operation.
The following dhrystone benchmark results are for a core with enabled
-`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
+`ENABLE_FAST_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options.
-Dhrystone benchmark results: 0.505 DMIPS/MHz (888 Dhrystones/Second/MHz)
+Dhrystone benchmark results: 0.521 DMIPS/MHz (916 Dhrystones/Second/MHz)
-For the Dhrystone benchmark the average CPI is 4.208.
+For the Dhrystone benchmark the average CPI is 4.081.
PicoRV32 Native Memory Interface
diff --git a/dhrystone/Makefile b/dhrystone/Makefile
index 3609a97..9cc191f 100644
--- a/dhrystone/Makefile
+++ b/dhrystone/Makefile
@@ -29,7 +29,7 @@ timing.vvp: testbench.v ../picorv32.v
chmod -x timing.vvp
dhry.hex: dhry.elf
- riscv32-unknown-elf-objcopy -O verilog $< $@
+ $(TOOLCHAIN_PREFIX)objcopy -O verilog $< $@
ifeq ($(USE_MYSTDLIB),1)
dhry.elf: $(OBJS) sections.lds
diff --git a/dhrystone/testbench.v b/dhrystone/testbench.v
index 2d985d6..7744b0d 100644
--- a/dhrystone/testbench.v
+++ b/dhrystone/testbench.v
@@ -28,7 +28,7 @@ module testbench;
picorv32 #(
.BARREL_SHIFTER(1),
- .ENABLE_MUL(1),
+ .ENABLE_FAST_MUL(1),
.ENABLE_DIV(1),
.PROGADDR_RESET('h10000),
.STACKADDR('h10000)
diff --git a/picorv32.v b/picorv32.v
index d4fb656..88443b2 100644
--- a/picorv32.v
+++ b/picorv32.v
@@ -1163,6 +1163,56 @@ module picorv32 #(
clear_prefetched_high_word = COMPRESSED_ISA;
end
+ reg cpuregs_write;
+ reg [31:0] cpuregs_wrdata;
+ reg [31:0] cpuregs_rs1;
+ reg [31:0] cpuregs_rs2;
+ reg [regindex_bits-1:0] decoded_rs;
+
+ always @* begin
+ cpuregs_write = 0;
+ cpuregs_wrdata = 'bx;
+
+ if (cpu_state == cpu_state_fetch) begin
+ (* parallel_case *)
+ case (1'b1)
+ latched_branch: begin
+ cpuregs_wrdata = reg_pc + (latched_compr ? 2 : 4);
+ cpuregs_write = 1;
+ end
+ latched_store && !latched_branch: begin
+ cpuregs_wrdata = latched_stalu ? alu_out_q : reg_out;
+ cpuregs_write = 1;
+ end
+ ENABLE_IRQ && irq_state[0]: begin
+ cpuregs_wrdata = reg_next_pc | latched_compr;
+ cpuregs_write = 1;
+ end
+ ENABLE_IRQ && irq_state[1]: begin
+ cpuregs_wrdata = irq_pending & ~irq_mask;
+ cpuregs_write = 1;
+ end
+ endcase
+ end
+ end
+
+ always @(posedge clk) begin
+ if (cpuregs_write)
+ cpuregs[latched_rd] <= cpuregs_wrdata;
+ end
+
+ always @* begin
+ decoded_rs = 'bx;
+ if (ENABLE_REGS_DUALPORT) begin
+ cpuregs_rs1 = decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ cpuregs_rs2 = decoded_rs2 ? cpuregs[decoded_rs2] : 0;
+ end else begin
+ decoded_rs = (cpu_state == cpu_state_ld_rs2) ? decoded_rs2 : decoded_rs1;
+ cpuregs_rs1 = decoded_rs ? cpuregs[decoded_rs] : 0;
+ cpuregs_rs2 = cpuregs_rs1;
+ end
+ end
+
assign launch_next_insn = cpu_state == cpu_state_fetch && decoder_trigger && (!ENABLE_IRQ || irq_delay || irq_active || !(irq_pending & ~irq_mask));
always @(posedge clk) begin
@@ -1264,21 +1314,17 @@ module picorv32 #(
latched_branch: begin
current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) : reg_next_pc;
`debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + (latched_compr ? 2 : 4), current_pc);)
- cpuregs[latched_rd] <= reg_pc + (latched_compr ? 2 : 4);
end
latched_store && !latched_branch: begin
`debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
- cpuregs[latched_rd] <= latched_stalu ? alu_out_q : reg_out;
end
ENABLE_IRQ && irq_state[0]: begin
- cpuregs[latched_rd] <= current_pc | latched_compr;
current_pc = PROGADDR_IRQ;
irq_active <= 1;
mem_do_rinst <= 1;
end
ENABLE_IRQ && irq_state[1]: begin
eoi <= irq_pending & ~irq_mask;
- cpuregs[latched_rd] <= irq_pending & ~irq_mask;
next_irq_pending = next_irq_pending & irq_mask;
end
endcase
@@ -1353,13 +1399,13 @@ module picorv32 #(
case (1'b1)
(CATCH_ILLINSN || WITH_PCPI) && instr_trap: begin
if (WITH_PCPI) begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_op1 <= cpuregs_rs1;
if (ENABLE_REGS_DUALPORT) begin
pcpi_valid <= 1;
- `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);)
- reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
- reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
+ `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
+ reg_sh <= cpuregs_rs2;
+ reg_op2 <= cpuregs_rs2;
if (pcpi_int_ready) begin
mem_do_rinst <= 1;
pcpi_valid <= 0;
@@ -1413,14 +1459,14 @@ module picorv32 #(
cpu_state <= cpu_state_exec;
end
ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_out <= cpuregs_rs1;
latched_store <= 1;
cpu_state <= cpu_state_fetch;
end
ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_setq: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_out <= cpuregs_rs1;
latched_rd <= latched_rd | irqregs_offset;
latched_store <= 1;
cpu_state <= cpu_state_fetch;
@@ -1430,39 +1476,39 @@ module picorv32 #(
irq_active <= 0;
latched_branch <= 1;
latched_store <= 1;
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_out <= cpuregs_rs1;
cpu_state <= cpu_state_fetch;
end
ENABLE_IRQ && instr_maskirq: begin
latched_store <= 1;
reg_out <= irq_mask;
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- irq_mask <= (decoded_rs1 ? cpuregs[decoded_rs1] : 0) | MASKED_IRQ;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ irq_mask <= cpuregs_rs1 | MASKED_IRQ;
cpu_state <= cpu_state_fetch;
end
ENABLE_IRQ && ENABLE_IRQ_TIMER && instr_timer: begin
latched_store <= 1;
reg_out <= timer;
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- timer <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ timer <= cpuregs_rs1;
cpu_state <= cpu_state_fetch;
end
is_lb_lh_lw_lbu_lhu: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_op1 <= cpuregs_rs1;
cpu_state <= cpu_state_ldmem;
mem_do_rinst <= 1;
end
is_slli_srli_srai && !BARREL_SHIFTER: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_op1 <= cpuregs_rs1;
reg_sh <= decoded_rs2;
cpu_state <= cpu_state_shift;
end
is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_op1 <= cpuregs_rs1;
reg_op2 <= is_slli_srli_srai && BARREL_SHIFTER ? decoded_rs2 : decoded_imm;
if (TWO_CYCLE_ALU)
alu_wait <= 1;
@@ -1471,12 +1517,12 @@ module picorv32 #(
cpu_state <= cpu_state_exec;
end
default: begin
- `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
- reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
+ `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+ reg_op1 <= cpuregs_rs1;
if (ENABLE_REGS_DUALPORT) begin
- `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);)
- reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
- reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
+ `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
+ reg_sh <= cpuregs_rs2;
+ reg_op2 <= cpuregs_rs2;
(* parallel_case *)
case (1'b1)
is_sb_sh_sw: begin
@@ -1502,9 +1548,9 @@ module picorv32 #(
end
cpu_state_ld_rs2: begin
- `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);)
- reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
- reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0;
+ `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
+ reg_sh <= cpuregs_rs2;
+ reg_op2 <= cpuregs_rs2;
(* parallel_case *)
case (1'b1)
@@ -1862,7 +1908,7 @@ module picorv32_pcpi_mul #(
always @(posedge clk) begin
pcpi_wr <= 0;
pcpi_ready <= 0;
- if (mul_finish) begin
+ if (mul_finish && resetn) begin
pcpi_wr <= 1;
pcpi_ready <= 1;
pcpi_rd <= instr_any_mulh ? rd >> 32 : rd;
@@ -1889,7 +1935,8 @@ module picorv32_pcpi_fast_mul (
wire instr_rs2_signed = |{instr_mulh};
reg active1, active2, shift_out;
- reg [63:0] rs1, rs2, rd;
+ reg [32:0] rs1, rs2;
+ reg [63:0] rd;
always @* begin
instr_mul = 0;
@@ -1908,7 +1955,7 @@ module picorv32_pcpi_fast_mul (
end
always @(posedge clk) begin
- rd <= rs1 * rs2;
+ rd <= $signed(rs1) * $signed(rs2);
end
always @(posedge clk) begin
@@ -1928,6 +1975,11 @@ module picorv32_pcpi_fast_mul (
end
active2 <= active1;
shift_out <= instr_any_mulh;
+
+ if (!resetn) begin
+ active1 <= 0;
+ active2 <= 0;
+ end
end
assign pcpi_wr = active2;
diff --git a/scripts/smtbmc/.gitignore b/scripts/smtbmc/.gitignore
index 54541e1..66e0f4c 100644
--- a/scripts/smtbmc/.gitignore
+++ b/scripts/smtbmc/.gitignore
@@ -2,5 +2,7 @@ tracecmp.smt2
tracecmp.yslog
notrap_validop.smt2
notrap_validop.yslog
+mulcmp.smt2
+mulcmp.yslog
output.vcd
output.smtc
diff --git a/scripts/smtbmc/mulcmp.sh b/scripts/smtbmc/mulcmp.sh
new file mode 100644
index 0000000..586d72a
--- /dev/null
+++ b/scripts/smtbmc/mulcmp.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -ex
+
+yosys -ql mulcmp.yslog \
+ -p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \
+ -p 'read_verilog -formal mulcmp.v' \
+ -p 'prep -top testbench -nordff' \
+ -p 'write_smt2 -wires mulcmp.smt2'
+
+yosys-smtbmc -s yices -t 100 --dump-vcd output.vcd --dump-smtc output.smtc mulcmp.smt2
+
diff --git a/scripts/smtbmc/mulcmp.v b/scripts/smtbmc/mulcmp.v
new file mode 100644
index 0000000..20c47f3
--- /dev/null
+++ b/scripts/smtbmc/mulcmp.v
@@ -0,0 +1,87 @@
+module testbench(input clk, mem_ready_0, mem_ready_1);
+
+ reg resetn = 0;
+
+ always @(posedge clk)
+ resetn <= 1;
+
+ reg pcpi_valid_0 = 1;
+ reg pcpi_valid_1 = 1;
+
+ wire [31:0] pcpi_insn = $anyconst;
+ wire [31:0] pcpi_rs1 = $anyconst;
+ wire [31:0] pcpi_rs2 = $anyconst;
+
+ wire pcpi_wr_0;
+ wire [31:0] pcpi_rd_0;
+ wire pcpi_wait_0;
+ wire pcpi_ready_0;
+
+ wire pcpi_wr_1;
+ wire [31:0] pcpi_rd_1;
+ wire pcpi_wait_1;
+ wire pcpi_ready_1;
+
+ reg pcpi_wr_ref;
+ reg [31:0] pcpi_rd_ref;
+ reg pcpi_ready_ref = 0;
+
+ picorv32_pcpi_mul mul_0 (
+ .clk (clk ),
+ .resetn (resetn ),
+ .pcpi_valid(pcpi_valid_0),
+ .pcpi_insn (pcpi_insn ),
+ .pcpi_rs1 (pcpi_rs1 ),
+ .pcpi_rs2 (pcpi_rs2 ),
+ .pcpi_wr (pcpi_wr_0 ),
+ .pcpi_rd (pcpi_rd_0 ),
+ .pcpi_wait (pcpi_wait_0 ),
+ .pcpi_ready(pcpi_ready_0),
+
+ );
+
+ picorv32_pcpi_fast_mul mul_1 (
+ .clk (clk ),
+ .resetn (resetn ),
+ .pcpi_valid(pcpi_valid_1),
+ .pcpi_insn (pcpi_insn ),
+ .pcpi_rs1 (pcpi_rs1 ),
+ .pcpi_rs2 (pcpi_rs2 ),
+ .pcpi_wr (pcpi_wr_1 ),
+ .pcpi_rd (pcpi_rd_1 ),
+ .pcpi_wait (pcpi_wait_1 ),
+ .pcpi_ready(pcpi_ready_1),
+
+ );
+
+ always @(posedge clk) begin
+ if (resetn) begin
+ if (pcpi_ready_0 && pcpi_ready_1) begin
+ assert(pcpi_wr_0 == pcpi_wr_1);
+ assert(pcpi_rd_0 == pcpi_rd_1);
+ end
+
+ if (pcpi_ready_0) begin
+ pcpi_valid_0 <= 0;
+ pcpi_wr_ref <= pcpi_wr_0;
+ pcpi_rd_ref <= pcpi_rd_0;
+ pcpi_ready_ref <= 1;
+ if (pcpi_ready_ref) begin
+ assert(pcpi_wr_0 == pcpi_wr_ref);
+ assert(pcpi_rd_0 == pcpi_rd_ref);
+ end
+ end
+
+ if (pcpi_ready_1) begin
+ pcpi_valid_1 <= 0;
+ pcpi_wr_ref <= pcpi_wr_1;
+ pcpi_rd_ref <= pcpi_rd_1;
+ pcpi_ready_ref <= 1;
+ if (pcpi_ready_ref) begin
+ assert(pcpi_wr_1 == pcpi_wr_ref);
+ assert(pcpi_rd_1 == pcpi_rd_ref);
+ end
+ end
+ end
+ end
+endmodule
diff --git a/scripts/smtbmc/notrap_validop.sh b/scripts/smtbmc/notrap_validop.sh
index 10a7ca5..95e0f92 100644
--- a/scripts/smtbmc/notrap_validop.sh
+++ b/scripts/smtbmc/notrap_validop.sh
@@ -6,8 +6,8 @@ yosys -ql notrap_validop.yslog \
-p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \
-p 'read_verilog -formal notrap_validop.v' \
-p 'prep -top testbench -nordff' \
- -p 'write_smt2 -mem -bv -wires notrap_validop.smt2'
+ -p 'write_smt2 -wires notrap_validop.smt2'
-#yosys-smtbmc -s yices -t 50 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2
-yosys-smtbmc -s yices -i -t 27 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2
+yosys-smtbmc -s yices -t 30 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2
+yosys-smtbmc -s yices -i -t 30 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2
diff --git a/scripts/smtbmc/tracecmp.sh b/scripts/smtbmc/tracecmp.sh
index 6f471bd..449af52 100644
--- a/scripts/smtbmc/tracecmp.sh
+++ b/scripts/smtbmc/tracecmp.sh
@@ -6,7 +6,7 @@ yosys -ql tracecmp.yslog \
-p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \
-p 'read_verilog -formal tracecmp.v' \
-p 'prep -top testbench -nordff' \
- -p 'write_smt2 -mem -bv -wires tracecmp.smt2'
+ -p 'write_smt2 -wires tracecmp.smt2'
yosys-smtbmc -s yices --smtc tracecmp.smtc --dump-vcd output.vcd --dump-smtc output.smtc tracecmp.smt2