aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClifford Wolf <clifford@clifford.at>2015-07-08 20:17:03 +0200
committerClifford Wolf <clifford@clifford.at>2015-07-08 20:17:03 +0200
commitdd30b57ea6d799b0e2112f1fb130f1bc6df5f86e (patch)
treedd299807cdc26bd224bd9827a53b2b0bce161d8a
parenta97a7159876cea113b2c115498bd8b5d8b5e1be7 (diff)
downloadpicorv32-dd30b57ea6d799b0e2112f1fb130f1bc6df5f86e.tar.gz
picorv32-dd30b57ea6d799b0e2112f1fb130f1bc6df5f86e.zip
Added TWO_CYCLE_ALU parameter
-rw-r--r--README.md12
-rw-r--r--picorv32.v176
2 files changed, 136 insertions, 52 deletions
diff --git a/README.md b/README.md
index af9bcf7..98ea96d 100644
--- a/README.md
+++ b/README.md
@@ -171,6 +171,18 @@ This relaxes the longest data path a bit by adding an additional FF stage
at the cost of adding an additional clock cycle delay to the conditional
branch instructions.
+*Note: Enabling this parameter will be most effective when retiming (aka
+"register balancing") is enabled in the synthesis flow.*
+
+#### TWO_CYCLE_ALU (default = 0)
+
+This adds an additional FF stage in the ALU data path, improving timing
+at the cost of an additional clock cycle for all instructions that use
+the ALU.
+
+*Note: Enabling this parameter will be most effective when retiming (aka
+"register balancing") is enabled in the synthesis flow.*
+
#### CATCH_MISALIGN (default = 1)
Set this to 0 to disable the circuitry for catching misaligned memory
diff --git a/picorv32.v b/picorv32.v
index b73e06b..cf39b29 100644
--- a/picorv32.v
+++ b/picorv32.v
@@ -38,6 +38,7 @@ module picorv32 #(
parameter [ 0:0] LATCHED_MEM_RDATA = 0,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
+ parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] CATCH_MISALIGN = 1,
parameter [ 0:0] CATCH_ILLINSN = 1,
parameter [ 0:0] ENABLE_PCPI = 0,
@@ -530,46 +531,104 @@ module picorv32 #(
reg [31:0] next_irq_pending;
reg do_waitirq;
- reg [31:0] alu_out, reg_alu_out;
- reg alu_out_0, reg_alu_out_0;
- reg alu_wait;
+ reg [31:0] alu_out, alu_out_q;
+ reg alu_out_0, alu_out_0_q;
+ reg alu_wait, alu_wait_2;
always @* begin
- alu_out_0 = 'bx;
- (* parallel_case, full_case *)
- case (1'b1)
- instr_beq:
- alu_out_0 = reg_op1 == reg_op2;
- instr_bne:
- alu_out_0 = reg_op1 != reg_op2;
- instr_bge:
- alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
- instr_bgeu:
- alu_out_0 = reg_op1 >= reg_op2;
- is_slti_blt_slt:
- alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
- is_sltiu_bltu_sltu:
- alu_out_0 = reg_op1 < reg_op2;
- endcase
-
- alu_out = 'bx;
- (* parallel_case, full_case *)
- case (1'b1)
- is_lui_auipc_jal_jalr_addi_add:
- alu_out = reg_op1 + reg_op2;
- instr_sub:
- alu_out = reg_op1 - reg_op2;
- is_compare:
- alu_out = alu_out_0;
- instr_xori || instr_xor:
- alu_out = reg_op1 ^ reg_op2;
- instr_ori || instr_or:
- alu_out = reg_op1 | reg_op2;
- instr_andi || instr_and:
- alu_out = reg_op1 & reg_op2;
- endcase
end
+ generate if (TWO_CYCLE_ALU) begin:two_cycle_alu
+ reg [31:0] alu_add_sub;
+ reg [31:0] alu_xor_or_and;
+ reg alu_eq, alu_ltu, alu_lts;
+
+ always @(posedge clk) begin
+ alu_add_sub <= instr_sub ? reg_op1 - reg_op2 : reg_op1 + reg_op2;
+
+ alu_xor_or_and = 'bx;
+ (* parallel_case, full_case *)
+ case (1'b1)
+ instr_xori || instr_xor:
+ alu_xor_or_and = reg_op1 ^ reg_op2;
+ instr_ori || instr_or:
+ alu_xor_or_and = reg_op1 | reg_op2;
+ instr_andi || instr_and:
+ alu_xor_or_and = reg_op1 & reg_op2;
+ endcase
+
+ alu_eq <= reg_op1 == reg_op2;
+ alu_lts <= $signed(reg_op1) < $signed(reg_op2);
+ alu_ltu <= reg_op1 < reg_op2;
+ end
+
+ always @* begin
+ alu_out_0 = 'bx;
+ (* parallel_case, full_case *)
+ case (1'b1)
+ instr_beq:
+ alu_out_0 = alu_eq;
+ instr_bne:
+ alu_out_0 = !alu_eq;
+ instr_bge:
+ alu_out_0 = !alu_lts;
+ instr_bgeu:
+ alu_out_0 = !alu_ltu;
+ is_slti_blt_slt:
+ alu_out_0 = alu_lts;
+ is_sltiu_bltu_sltu:
+ alu_out_0 = alu_ltu;
+ endcase
+
+ alu_out = 'bx;
+ (* parallel_case, full_case *)
+ case (1'b1)
+ is_lui_auipc_jal_jalr_addi_add || instr_sub:
+ alu_out = alu_add_sub;
+ is_compare:
+ alu_out = alu_out_0;
+ |{instr_xori, instr_xor, instr_ori, instr_or, instr_andi, instr_and}:
+ alu_out = alu_xor_or_and;
+ endcase
+ end
+ end else begin:one_cycle_alu
+ always @* begin
+ alu_out_0 = 'bx;
+ (* parallel_case, full_case *)
+ case (1'b1)
+ instr_beq:
+ alu_out_0 = reg_op1 == reg_op2;
+ instr_bne:
+ alu_out_0 = reg_op1 != reg_op2;
+ instr_bge:
+ alu_out_0 = $signed(reg_op1) >= $signed(reg_op2);
+ instr_bgeu:
+ alu_out_0 = reg_op1 >= reg_op2;
+ is_slti_blt_slt:
+ alu_out_0 = $signed(reg_op1) < $signed(reg_op2);
+ is_sltiu_bltu_sltu:
+ alu_out_0 = reg_op1 < reg_op2;
+ endcase
+
+ alu_out = 'bx;
+ (* parallel_case, full_case *)
+ case (1'b1)
+ is_lui_auipc_jal_jalr_addi_add:
+ alu_out = reg_op1 + reg_op2;
+ instr_sub:
+ alu_out = reg_op1 - reg_op2;
+ is_compare:
+ alu_out = alu_out_0;
+ instr_xori || instr_xor:
+ alu_out = reg_op1 ^ reg_op2;
+ instr_ori || instr_or:
+ alu_out = reg_op1 | reg_op2;
+ instr_andi || instr_and:
+ alu_out = reg_op1 & reg_op2;
+ endcase
+ end
+ end endgenerate
+
always @(posedge clk) begin
trap <= 0;
reg_sh <= 'bx;
@@ -578,9 +637,11 @@ module picorv32 #(
set_mem_do_rdata = 0;
set_mem_do_wdata = 0;
- reg_alu_out <= alu_out;
- reg_alu_out_0 <= alu_out_0;
+ alu_out_0_q <= alu_out_0;
+ alu_out_q <= alu_out;
+
alu_wait <= 0;
+ alu_wait_2 <= 0;
if (WITH_PCPI && CATCH_ILLINSN) begin
if (resetn && pcpi_valid && !pcpi_int_wait) begin
@@ -646,13 +707,13 @@ module picorv32 #(
(* parallel_case *)
case (1'b1)
latched_branch: begin
- current_pc = latched_store ? (latched_stalu ? reg_alu_out : reg_out) : reg_next_pc;
+ current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) : reg_next_pc;
`debug($display("ST_RD: %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + 4, current_pc);)
cpuregs[latched_rd] <= reg_pc + 4;
end
latched_store && !latched_branch: begin
- `debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? reg_alu_out : reg_out);)
- cpuregs[latched_rd] <= latched_stalu ? reg_alu_out : reg_out;
+ `debug($display("ST_RD: %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
+ cpuregs[latched_rd] <= latched_stalu ? alu_out_q : reg_out;
end
ENABLE_IRQ && irq_state[0]: begin
cpuregs[latched_rd] <= current_pc;
@@ -775,7 +836,10 @@ module picorv32 #(
is_lui_auipc_jal: begin
reg_op1 <= instr_lui ? 0 : reg_pc;
reg_op2 <= decoded_imm;
- mem_do_rinst <= mem_do_prefetch;
+ if (TWO_CYCLE_ALU)
+ alu_wait <= 1;
+ else
+ mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec;
end
ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin
@@ -830,7 +894,10 @@ module picorv32 #(
`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, decoded_rs1 ? cpuregs[decoded_rs1] : 0);)
reg_op1 <= decoded_rs1 ? cpuregs[decoded_rs1] : 0;
reg_op2 <= decoded_imm;
- mem_do_rinst <= mem_do_prefetch;
+ if (TWO_CYCLE_ALU)
+ alu_wait <= 1;
+ else
+ mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec;
end
default: begin
@@ -850,9 +917,10 @@ module picorv32 #(
cpu_state <= cpu_state_shift;
end
default: begin
- if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)
+ if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
+ alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
alu_wait <= 1;
- else
+ end else
mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec;
end
@@ -896,9 +964,10 @@ module picorv32 #(
cpu_state <= cpu_state_shift;
end
default: begin
- if (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)
+ if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
+ alu_wait_2 <= TWO_CYCLE_ALU && (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu);
alu_wait <= 1;
- else
+ end else
mem_do_rinst <= mem_do_prefetch;
cpu_state <= cpu_state_exec;
end
@@ -906,17 +975,18 @@ module picorv32 #(
end
cpu_state_exec: begin
- latched_store <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0;
- latched_branch <= TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0;
+ latched_store <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
+ latched_branch <= TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0;
reg_out <= reg_pc + decoded_imm;
- if (TWO_CYCLE_COMPARE && alu_wait) begin
- mem_do_rinst <= mem_do_prefetch;
+ if ((TWO_CYCLE_ALU || TWO_CYCLE_COMPARE) && (alu_wait || alu_wait_2)) begin
+ mem_do_rinst <= mem_do_prefetch && !alu_wait_2;
+ alu_wait <= alu_wait_2;
end else
if (is_beq_bne_blt_bge_bltu_bgeu) begin
latched_rd <= 0;
if (mem_done)
cpu_state <= cpu_state_fetch;
- if (TWO_CYCLE_COMPARE ? reg_alu_out_0 : alu_out_0) begin
+ if (TWO_CYCLE_COMPARE ? alu_out_0_q : alu_out_0) begin
decoder_trigger <= 0;
set_mem_do_rinst = 1;
end
@@ -1187,6 +1257,7 @@ module picorv32_axi #(
parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
parameter [ 0:0] TWO_STAGE_SHIFT = 1,
parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
+ parameter [ 0:0] TWO_CYCLE_ALU = 0,
parameter [ 0:0] CATCH_MISALIGN = 1,
parameter [ 0:0] CATCH_ILLINSN = 1,
parameter [ 0:0] ENABLE_PCPI = 0,
@@ -1283,6 +1354,7 @@ module picorv32_axi #(
.ENABLE_REGS_DUALPORT(ENABLE_REGS_DUALPORT),
.TWO_STAGE_SHIFT (TWO_STAGE_SHIFT ),
.TWO_CYCLE_COMPARE (TWO_CYCLE_COMPARE ),
+ .TWO_CYCLE_ALU (TWO_CYCLE_ALU ),
.CATCH_MISALIGN (CATCH_MISALIGN ),
.CATCH_ILLINSN (CATCH_ILLINSN ),
.ENABLE_PCPI (ENABLE_PCPI ),