diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | dhrystone/Makefile | 2 | ||||
-rw-r--r-- | dhrystone/testbench.v | 2 | ||||
-rw-r--r-- | picorv32.v | 124 | ||||
-rw-r--r-- | scripts/smtbmc/.gitignore | 2 | ||||
-rw-r--r-- | scripts/smtbmc/mulcmp.sh | 12 | ||||
-rw-r--r-- | scripts/smtbmc/mulcmp.v | 87 | ||||
-rw-r--r-- | scripts/smtbmc/notrap_validop.sh | 6 | ||||
-rw-r--r-- | scripts/smtbmc/tracecmp.sh | 2 |
10 files changed, 199 insertions, 46 deletions
@@ -30,7 +30,7 @@ check-yices: check.smt2 check.smt2: picorv32.v yosys -v2 -p 'read_verilog -formal picorv32.v' \ -p 'prep -top picorv32 -nordff' \ - -p 'write_smt2 -bv -mem -wires check.smt2' + -p 'write_smt2 -wires check.smt2' test_sp: testbench_sp.vvp firmware/firmware.hex vvp -N testbench_sp.vvp @@ -345,11 +345,11 @@ When `BARREL_SHIFTER` is activated, a shift operation takes as long as any other ALU operation. The following dhrystone benchmark results are for a core with enabled -`ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options. +`ENABLE_FAST_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options. -Dhrystone benchmark results: 0.505 DMIPS/MHz (888 Dhrystones/Second/MHz) +Dhrystone benchmark results: 0.521 DMIPS/MHz (916 Dhrystones/Second/MHz) -For the Dhrystone benchmark the average CPI is 4.208. +For the Dhrystone benchmark the average CPI is 4.081. PicoRV32 Native Memory Interface diff --git a/dhrystone/Makefile b/dhrystone/Makefile index 3609a97..9cc191f 100644 --- a/dhrystone/Makefile +++ b/dhrystone/Makefile @@ -29,7 +29,7 @@ timing.vvp: testbench.v ../picorv32.v chmod -x timing.vvp dhry.hex: dhry.elf - riscv32-unknown-elf-objcopy -O verilog $< $@ + $(TOOLCHAIN_PREFIX)objcopy -O verilog $< $@ ifeq ($(USE_MYSTDLIB),1) dhry.elf: $(OBJS) sections.lds diff --git a/dhrystone/testbench.v b/dhrystone/testbench.v index 2d985d6..7744b0d 100644 --- a/dhrystone/testbench.v +++ b/dhrystone/testbench.v @@ -28,7 +28,7 @@ module testbench; picorv32 #( .BARREL_SHIFTER(1), - .ENABLE_MUL(1), + .ENABLE_FAST_MUL(1), .ENABLE_DIV(1), .PROGADDR_RESET('h10000), .STACKADDR('h10000) @@ -1163,6 +1163,56 @@ module picorv32 #( clear_prefetched_high_word = COMPRESSED_ISA; end + reg cpuregs_write; + reg [31:0] cpuregs_wrdata; + reg [31:0] cpuregs_rs1; + reg [31:0] cpuregs_rs2; + reg [regindex_bits-1:0] decoded_rs; + + always @* begin + cpuregs_write = 0; + cpuregs_wrdata = 'bx; + + if (cpu_state == cpu_state_fetch) begin + (* parallel_case *) + case (1'b1) + latched_branch: begin + cpuregs_wrdata = reg_pc + (latched_compr ? 2 : 4); + cpuregs_write = 1; + end + latched_store && !latched_branch: begin + cpuregs_wrdata = latched_stalu ? alu_out_q : reg_out; + cpuregs_write = 1; + end + ENABLE_IRQ && irq_state[0]: begin + cpuregs_wrdata = reg_next_pc | latched_compr; + cpuregs_write = 1; + end + ENABLE_IRQ && irq_state[1]: begin + cpuregs_wrdata = irq_pending & ~irq_mask; + cpuregs_write = 1; + end + endcase + end + end + + always @(posedge clk) begin + if (cpuregs_write) + cpuregs[latched_rd] <= cpuregs_wrdata; + end + + always @* begin + decoded_rs = 'bx; + if (ENABLE_REGS_DUALPORT) begin + cpuregs_rs1 = decoded_rs1 ? cpuregs[decoded_rs1] : 0; + cpuregs_rs2 = decoded_rs2 ? cpuregs[decoded_rs2] : 0; + end else begin + decoded_rs = (cpu_state == cpu_state_ld_rs2) ? decoded_rs2 : decoded_rs1; + cpuregs_rs1 = decoded_rs ? cpuregs[decoded_rs] : 0; + cpuregs_rs2 = cpuregs_rs1; + end + end + assign launch_next_insn = cpu_state == cpu_state_fetch && decoder_trigger && (!ENABLE_IRQ || irq_delay || irq_active || !(irq_pending & ~irq_mask)); always @(posedge clk) begin @@ -1264,21 +1314,17 @@ module picorv32 #( latched_branch: begin current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) : reg_next_pc; `debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + (latched_compr ? 2 : 4), current_pc);) - cpuregs[latched_rd] <= reg_pc + (latched_compr ? 2 : 4); end latched_store && !latched_branch: begin `debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);) - cpuregs[latched_rd] <= latched_stalu ? alu_out_q : reg_out; end ENABLE_IRQ && irq_state[0]: begin - cpuregs[latched_rd] <= current_pc | latched_compr; current_pc = PROGADDR_IRQ; irq_active <= 1; mem_do_rinst <= 1; end ENABLE_IRQ && irq_state[1]: begin eoi <= irq_pending & ~irq_mask; - cpuregs[latched_rd] <= irq_pending & ~irq_mask; next_irq_pending = next_irq_pending & irq_mask; end endcase @@ -1353,13 +1399,13 @@ module picorv32 #( case (1'b1) (CATCH_ILLINSN || WITH_PCPI) && instr_trap: begin if (WITH_PCPI) begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_op1 <= cpuregs_rs1; if (ENABLE_REGS_DUALPORT) begin pcpi_valid <= 1; - `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);) - reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; - reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; + `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) + reg_sh <= cpuregs_rs2; + reg_op2 <= cpuregs_rs2; if (pcpi_int_ready) begin mem_do_rinst <= 1; pcpi_valid <= 0; @@ -1413,14 +1459,14 @@ module picorv32 #( cpu_state <= cpu_state_exec; end ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_out <= cpuregs_rs1; latched_store <= 1; cpu_state <= cpu_state_fetch; end ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_setq: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_out <= cpuregs_rs1; latched_rd <= latched_rd | irqregs_offset; latched_store <= 1; cpu_state <= cpu_state_fetch; @@ -1430,39 +1476,39 @@ module picorv32 #( irq_active <= 0; latched_branch <= 1; latched_store <= 1; - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_out <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_out <= cpuregs_rs1; cpu_state <= cpu_state_fetch; end ENABLE_IRQ && instr_maskirq: begin latched_store <= 1; reg_out <= irq_mask; - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - irq_mask <= (decoded_rs1 ? cpuregs[decoded_rs1] : 0) | MASKED_IRQ; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + irq_mask <= cpuregs_rs1 | MASKED_IRQ; cpu_state <= cpu_state_fetch; end ENABLE_IRQ && ENABLE_IRQ_TIMER && instr_timer: begin latched_store <= 1; reg_out <= timer; - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - timer <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + timer <= cpuregs_rs1; cpu_state <= cpu_state_fetch; end is_lb_lh_lw_lbu_lhu: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_op1 <= cpuregs_rs1; cpu_state <= cpu_state_ldmem; mem_do_rinst <= 1; end is_slli_srli_srai && !BARREL_SHIFTER: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_op1 <= cpuregs_rs1; reg_sh <= decoded_rs2; cpu_state <= cpu_state_shift; end is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_op1 <= cpuregs_rs1; reg_op2 <= is_slli_srli_srai && BARREL_SHIFTER ? decoded_rs2 : decoded_imm; if (TWO_CYCLE_ALU) alu_wait <= 1; @@ -1471,12 +1517,12 @@ module picorv32 #( cpu_state <= cpu_state_exec; end default: begin - `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);) - reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0; + `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);) + reg_op1 <= cpuregs_rs1; if (ENABLE_REGS_DUALPORT) begin - `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);) - reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; - reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; + `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) + reg_sh <= cpuregs_rs2; + reg_op2 <= cpuregs_rs2; (* parallel_case *) case (1'b1) is_sb_sh_sw: begin @@ -1502,9 +1548,9 @@ module picorv32 #( end cpu_state_ld_rs2: begin - `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, decoded_rs2 ? cpuregs[decoded_rs2] : 0);) - reg_sh <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; - reg_op2 <= decoded_rs2 ? cpuregs[decoded_rs2] : 0; + `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);) + reg_sh <= cpuregs_rs2; + reg_op2 <= cpuregs_rs2; (* parallel_case *) case (1'b1) @@ -1862,7 +1908,7 @@ module picorv32_pcpi_mul #( always @(posedge clk) begin pcpi_wr <= 0; pcpi_ready <= 0; - if (mul_finish) begin + if (mul_finish && resetn) begin pcpi_wr <= 1; pcpi_ready <= 1; pcpi_rd <= instr_any_mulh ? rd >> 32 : rd; @@ -1889,7 +1935,8 @@ module picorv32_pcpi_fast_mul ( wire instr_rs2_signed = |{instr_mulh}; reg active1, active2, shift_out; - reg [63:0] rs1, rs2, rd; + reg [32:0] rs1, rs2; + reg [63:0] rd; always @* begin instr_mul = 0; @@ -1908,7 +1955,7 @@ module picorv32_pcpi_fast_mul ( end always @(posedge clk) begin - rd <= rs1 * rs2; + rd <= $signed(rs1) * $signed(rs2); end always @(posedge clk) begin @@ -1928,6 +1975,11 @@ module picorv32_pcpi_fast_mul ( end active2 <= active1; shift_out <= instr_any_mulh; + + if (!resetn) begin + active1 <= 0; + active2 <= 0; + end end assign pcpi_wr = active2; diff --git a/scripts/smtbmc/.gitignore b/scripts/smtbmc/.gitignore index 54541e1..66e0f4c 100644 --- a/scripts/smtbmc/.gitignore +++ b/scripts/smtbmc/.gitignore @@ -2,5 +2,7 @@ tracecmp.smt2 tracecmp.yslog notrap_validop.smt2 notrap_validop.yslog +mulcmp.smt2 +mulcmp.yslog output.vcd output.smtc diff --git a/scripts/smtbmc/mulcmp.sh b/scripts/smtbmc/mulcmp.sh new file mode 100644 index 0000000..586d72a --- /dev/null +++ b/scripts/smtbmc/mulcmp.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -ex + +yosys -ql mulcmp.yslog \ + -p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \ + -p 'read_verilog -formal mulcmp.v' \ + -p 'prep -top testbench -nordff' \ + -p 'write_smt2 -wires mulcmp.smt2' + +yosys-smtbmc -s yices -t 100 --dump-vcd output.vcd --dump-smtc output.smtc mulcmp.smt2 + diff --git a/scripts/smtbmc/mulcmp.v b/scripts/smtbmc/mulcmp.v new file mode 100644 index 0000000..20c47f3 --- /dev/null +++ b/scripts/smtbmc/mulcmp.v @@ -0,0 +1,87 @@ +module testbench(input clk, mem_ready_0, mem_ready_1); + + reg resetn = 0; + + always @(posedge clk) + resetn <= 1; + + reg pcpi_valid_0 = 1; + reg pcpi_valid_1 = 1; + + wire [31:0] pcpi_insn = $anyconst; + wire [31:0] pcpi_rs1 = $anyconst; + wire [31:0] pcpi_rs2 = $anyconst; + + wire pcpi_wr_0; + wire [31:0] pcpi_rd_0; + wire pcpi_wait_0; + wire pcpi_ready_0; + + wire pcpi_wr_1; + wire [31:0] pcpi_rd_1; + wire pcpi_wait_1; + wire pcpi_ready_1; + + reg pcpi_wr_ref; + reg [31:0] pcpi_rd_ref; + reg pcpi_ready_ref = 0; + + picorv32_pcpi_mul mul_0 ( + .clk (clk ), + .resetn (resetn ), + .pcpi_valid(pcpi_valid_0), + .pcpi_insn (pcpi_insn ), + .pcpi_rs1 (pcpi_rs1 ), + .pcpi_rs2 (pcpi_rs2 ), + .pcpi_wr (pcpi_wr_0 ), + .pcpi_rd (pcpi_rd_0 ), + .pcpi_wait (pcpi_wait_0 ), + .pcpi_ready(pcpi_ready_0), + + ); + + picorv32_pcpi_fast_mul mul_1 ( + .clk (clk ), + .resetn (resetn ), + .pcpi_valid(pcpi_valid_1), + .pcpi_insn (pcpi_insn ), + .pcpi_rs1 (pcpi_rs1 ), + .pcpi_rs2 (pcpi_rs2 ), + .pcpi_wr (pcpi_wr_1 ), + .pcpi_rd (pcpi_rd_1 ), + .pcpi_wait (pcpi_wait_1 ), + .pcpi_ready(pcpi_ready_1), + + ); + + always @(posedge clk) begin + if (resetn) begin + if (pcpi_ready_0 && pcpi_ready_1) begin + assert(pcpi_wr_0 == pcpi_wr_1); + assert(pcpi_rd_0 == pcpi_rd_1); + end + + if (pcpi_ready_0) begin + pcpi_valid_0 <= 0; + pcpi_wr_ref <= pcpi_wr_0; + pcpi_rd_ref <= pcpi_rd_0; + pcpi_ready_ref <= 1; + if (pcpi_ready_ref) begin + assert(pcpi_wr_0 == pcpi_wr_ref); + assert(pcpi_rd_0 == pcpi_rd_ref); + end + end + + if (pcpi_ready_1) begin + pcpi_valid_1 <= 0; + pcpi_wr_ref <= pcpi_wr_1; + pcpi_rd_ref <= pcpi_rd_1; + pcpi_ready_ref <= 1; + if (pcpi_ready_ref) begin + assert(pcpi_wr_1 == pcpi_wr_ref); + assert(pcpi_rd_1 == pcpi_rd_ref); + end + end + end + end +endmodule diff --git a/scripts/smtbmc/notrap_validop.sh b/scripts/smtbmc/notrap_validop.sh index 10a7ca5..95e0f92 100644 --- a/scripts/smtbmc/notrap_validop.sh +++ b/scripts/smtbmc/notrap_validop.sh @@ -6,8 +6,8 @@ yosys -ql notrap_validop.yslog \ -p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \ -p 'read_verilog -formal notrap_validop.v' \ -p 'prep -top testbench -nordff' \ - -p 'write_smt2 -mem -bv -wires notrap_validop.smt2' + -p 'write_smt2 -wires notrap_validop.smt2' -#yosys-smtbmc -s yices -t 50 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2 -yosys-smtbmc -s yices -i -t 27 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2 +yosys-smtbmc -s yices -t 30 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2 +yosys-smtbmc -s yices -i -t 30 --dump-vcd output.vcd --dump-smtc output.smtc notrap_validop.smt2 diff --git a/scripts/smtbmc/tracecmp.sh b/scripts/smtbmc/tracecmp.sh index 6f471bd..449af52 100644 --- a/scripts/smtbmc/tracecmp.sh +++ b/scripts/smtbmc/tracecmp.sh @@ -6,7 +6,7 @@ yosys -ql tracecmp.yslog \ -p 'read_verilog -formal -norestrict -assume-asserts ../../picorv32.v' \ -p 'read_verilog -formal tracecmp.v' \ -p 'prep -top testbench -nordff' \ - -p 'write_smt2 -mem -bv -wires tracecmp.smt2' + -p 'write_smt2 -wires tracecmp.smt2' yosys-smtbmc -s yices --smtc tracecmp.smtc --dump-vcd output.vcd --dump-smtc output.smtc tracecmp.smt2 |