summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2022-10-02 09:49:48 -0600
committerAlejandro Soto <alejandro@34project.org>2022-10-02 09:49:48 -0600
commite97d445908f39a3a1a215a824f52b283147e6195 (patch)
tree74baa4d5ee535c12dbbe400f8e6c2fda342b8e37
parent5e2f6be247018699d71d32887010830ba45b8349 (diff)
Major shifter-ALU redesign
The shifter unit now works in parallel with the ALU and is no longer part of it. Instructions that use the shifter as input to the ALU will now take an additional cycle, unless the control unit can detect a "trivial shift" situation where the shifter's output will be the same as its input. This change improves Fmax substantially.
Diffstat (limited to '')
-rw-r--r--conspiracion.qsf2
-rw-r--r--rtl/core/alu/alu.sv32
-rw-r--r--rtl/core/alu/shifter.sv42
-rw-r--r--rtl/core/arm810.sv47
-rw-r--r--rtl/core/cycles.sv158
-rw-r--r--rtl/core/decode/data.sv12
-rw-r--r--rtl/core/decode/decode.sv24
-rw-r--r--rtl/core/psr.sv5
-rw-r--r--rtl/core/shifter.sv42
-rw-r--r--rtl/core/uarch.sv15
10 files changed, 210 insertions, 169 deletions
diff --git a/conspiracion.qsf b/conspiracion.qsf
index c81ea0a..e8371d3 100644
--- a/conspiracion.qsf
+++ b/conspiracion.qsf
@@ -226,6 +226,7 @@ set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/alu/xor.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/alu/add.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/alu/alu.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/psr.sv
+set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/shifter.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/regs/file.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/regs/regs.sv
set_global_assignment -name SYSTEMVERILOG_FILE rtl/core/regs/map.sv
@@ -240,4 +241,5 @@ set_global_assignment -name QIP_FILE platform/synthesis/platform.qip
set_global_assignment -name SDC_FILE conspiracion.sdc
+
set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top \ No newline at end of file
diff --git a/rtl/core/alu/alu.sv b/rtl/core/alu/alu.sv
index 6ded727..d999164 100644
--- a/rtl/core/alu/alu.sv
+++ b/rtl/core/alu/alu.sv
@@ -3,10 +3,9 @@
module core_alu
#(parameter W=16)
(
- input alu_control ctrl,
+ input alu_op op,
input logic[W - 1:0] a,
- base,
- input logic[7:0] shift,
+ b,
input logic c_in,
output logic[W - 1:0] q,
@@ -14,22 +13,13 @@ module core_alu
output logic v_valid
);
- logic c, v, swap, sub, and_not, c_shifter, c_add, v_add;
- logic[W - 1:0] b, swap_a, swap_b, not_b, c_in_add, q_add, q_and, q_orr, q_xor;
+ logic c, v, swap, sub, and_not, c_add, v_add;
+ logic[W - 1:0] swap_a, swap_b, not_b, c_in_add, q_add, q_and, q_orr, q_xor;
assign swap_a = swap ? b : a;
assign swap_b = swap ? a : b;
assign not_b = ~b;
- core_alu_shifter #(.W(W)) shifter
- (
- .base(base),
- .shift(shift),
- .b(b),
- .c(c_shifter),
- .*
- );
-
core_alu_add #(.W(W)) op_add
(
.a(swap_a),
@@ -61,7 +51,7 @@ module core_alu
);
always_comb begin
- unique case(ctrl.op)
+ unique case(op)
`ALU_ADD, `ALU_ADC, `ALU_CMN, `ALU_CMP, `ALU_SUB, `ALU_SBC:
swap = 0;
@@ -72,7 +62,7 @@ module core_alu
swap = 1'bx;
endcase
- unique case(ctrl.op)
+ unique case(op)
`ALU_ADD, `ALU_CMN, `ALU_ADC:
sub = 0;
@@ -83,7 +73,7 @@ module core_alu
sub = 1'bx;
endcase
- unique case(ctrl.op)
+ unique case(op)
`ALU_ADD, `ALU_CMN, `ALU_CMP, `ALU_SUB, `ALU_RSB:
c_in_add = 0;
@@ -97,7 +87,7 @@ module core_alu
c_in_add = {W{1'bx}};
endcase
- unique case(ctrl.op)
+ unique case(op)
`ALU_AND, `ALU_TST:
and_not = 0;
@@ -108,7 +98,7 @@ module core_alu
and_not = 1'bx;
endcase
- unique case(ctrl.op)
+ unique case(op)
`ALU_SUB, `ALU_RSB, `ALU_ADD, `ALU_ADC, `ALU_SBC, `ALU_RSC, `ALU_CMP, `ALU_CMN:
q = q_add;
@@ -128,9 +118,9 @@ module core_alu
q = not_b;
endcase
- unique case(ctrl.op)
+ unique case(op)
`ALU_AND, `ALU_EOR, `ALU_TST, `ALU_TEQ, `ALU_ORR, `ALU_MOV, `ALU_BIC, `ALU_MVN: begin
- c = c_shifter;
+ c = c_in;
v = 1'bx;
v_valid = 0;
end
diff --git a/rtl/core/alu/shifter.sv b/rtl/core/alu/shifter.sv
deleted file mode 100644
index 1f99e99..0000000
--- a/rtl/core/alu/shifter.sv
+++ /dev/null
@@ -1,42 +0,0 @@
-`include "core/uarch.sv"
-
-module core_alu_shifter
-#(parameter W=16)
-(
- input alu_control ctrl,
- input logic[W - 1:0] base,
- input logic[7:0] shift,
- input logic c_in,
-
- output logic[W - 1:0] b,
- output logic c
-);
-
- localparam LOG = $clog2(W);
-
- logic[W - 1:0] b_no_c, b_shl, b_shr, b_ror;
- logic[W:0] sign_mask;
- logic c_shl, c_shr, c_ror;
-
- assign sign_mask = {(W + 1){ctrl.sign_extend & base[W - 1]}};
- assign {c_shl, b_shl} = {c_in, base} << shift;
- assign {b_shr, c_shr} = {base, c_in} >> shift | ~(sign_mask >> shift);
-
- logic ror_cycle;
- logic[LOG - 1:0] ror_shift;
- logic[2 * W:0] ror_out;
-
- assign ror_shift = shift[LOG - 1:0];
- assign ror_cycle = |shift[7:LOG] & ~|ror_shift;
- assign ror_out = {base, base, c_in} >> {ror_cycle, ror_shift};
- assign {b_ror, c_ror} = ror_out[W:0];
-
- always_comb
- if(ctrl.ror)
- {c, b} = {c_ror, b_ror};
- else if(ctrl.shr)
- {c, b} = {c_shr, b_shr[W - 1] | (ctrl.put_carry & c_in), b_shr[W - 2:0]};
- else
- {c, b} = {c_shl, b_shl};
-
-endmodule
diff --git a/rtl/core/arm810.sv b/rtl/core/arm810.sv
index 8ea6ed3..a8e197b 100644
--- a/rtl/core/arm810.sv
+++ b/rtl/core/arm810.sv
@@ -14,7 +14,7 @@ module arm810
logic stall, prefetch_flush;
word insn;
- ptr fetch_insn_pc, pc, pc_visible;
+ ptr fetch_insn_pc;
core_fetch #(.PREFETCH_ORDER(2)) fetch
(
@@ -31,7 +31,7 @@ module arm810
logic dec_execute, dec_undefined, dec_writeback, dec_branch, dec_update_flags;
ptr dec_branch_offset;
- alu_decode dec_alu;
+ data_decode dec_data;
core_decode decode
(
@@ -41,26 +41,28 @@ module arm810
.branch(dec_branch),
.update_flags(dec_update_flags),
.branch_offset(dec_branch_offset),
- .alu(dec_alu),
+ .data_ctrl(dec_data),
.*
);
reg_num rd, ra, rb;
- logic explicit_branch, writeback, update_flags;
- ptr branch_target;
+ logic explicit_branch, writeback, update_flags, c_in;
+ ptr branch_target, pc_visible;
psr_mode reg_mode;
- alu_control alu_ctrl;
- word alu_base;
- logic[7:0] alu_shift;
+ alu_op alu_ctrl;
+ shifter_control shifter_ctrl;
+ word alu_b, wr_value;
+ logic[7:0] shifter_shift;
core_cycles cycles
(
.branch(explicit_branch),
.alu(alu_ctrl),
+ .shifter(shifter_ctrl),
.*
);
- psr_flags flags;
+ psr_flags flags, next_flags;
core_psr psr
(
@@ -68,7 +70,7 @@ module arm810
);
logic wr_pc;
- word wr_value, rd_value_a, rd_value_b;
+ word rd_value_a, rd_value_b;
core_regs regs
(
@@ -84,17 +86,30 @@ module arm810
psr_flags alu_flags;
logic alu_v_valid;
+ word q_alu;
core_alu #(.W(32)) alu
(
- .ctrl(alu_ctrl),
+ .op(alu_ctrl),
.a(rd_value_a),
- .base(alu_base),
- .shift(alu_shift),
- .c_in(flags.c),
- .q(wr_value),
+ .b(alu_b),
+ .q(q_alu),
.nzcv(alu_flags),
- .v_valid(alu_v_valid)
+ .v_valid(alu_v_valid),
+ .*
+ );
+
+ word q_shifter;
+ logic c_shifter;
+
+ core_shifter #(.W(32)) shifter
+ (
+ .ctrl(shifter_ctrl),
+ .base(alu_b),
+ .shift(shifter_shift),
+ .c_in(flags.c),
+ .q(q_shifter),
+ .c(c_shifter)
);
endmodule
diff --git a/rtl/core/cycles.sv b/rtl/core/cycles.sv
index 8945bc2..c32bff0 100644
--- a/rtl/core/cycles.sv
+++ b/rtl/core/cycles.sv
@@ -2,77 +2,102 @@
module core_cycles
(
- input logic clk,
- dec_execute,
- dec_branch,
- dec_writeback,
- dec_update_flags,
- input ptr dec_branch_offset,
- input alu_decode dec_alu,
- input ptr fetch_insn_pc,
- input word rd_value_b,
-
- output logic stall,
- branch,
- writeback,
- update_flags,
- output reg_num rd,
- ra,
- rb,
- output ptr branch_target,
- pc,
- pc_visible,
- output psr_mode reg_mode,
- output alu_control alu,
- output word alu_base,
- output logic[7:0] alu_shift
+ input logic clk,
+ dec_execute,
+ dec_branch,
+ dec_writeback,
+ dec_update_flags,
+ input ptr dec_branch_offset,
+ input data_decode dec_data,
+ input ptr fetch_insn_pc,
+ input psr_flags next_flags,
+ input word rd_value_b,
+ q_alu,
+ q_shifter,
+ input logic c_shifter,
+
+ output logic stall,
+ branch,
+ writeback,
+ update_flags,
+ c_in,
+ output reg_num rd,
+ ra,
+ rb,
+ output ptr branch_target,
+ pc_visible,
+ output psr_mode reg_mode,
+ output alu_op alu,
+ output word alu_b,
+ wr_value,
+ output shifter_control shifter,
+ output logic[7:0] shifter_shift
);
enum
{
EXECUTE,
- RD_SHIFT
+ RD_INDIRECT_SHIFT,
+ WITH_SHIFT
} cycle, next_cycle;
- logic final_writeback, data_snd_is_imm, data_snd_shift_by_reg;
+ logic bubble, final_writeback, data_snd_is_imm, data_snd_shift_by_reg, trivial_shift;
logic[5:0] data_shift_imm;
logic[7:0] data_imm;
- logic bubble;
word saved_base;
- reg_num r_shift;
+ reg_num r_shift, final_rd;
+ ptr pc;
assign stall = (next_cycle != EXECUTE) | bubble;
assign pc_visible = pc + 2;
assign reg_mode = `MODE_SVC; //TODO
always_comb begin
+ unique case(cycle)
+ RD_INDIRECT_SHIFT: shifter_shift = rd_value_b[7:0];
+ default: shifter_shift = {2'b00, data_shift_imm};
+ endcase
+
+ trivial_shift = 1;
+ if(final_writeback & (shifter.shl | shifter.shr | shifter.ror))
+ trivial_shift = shifter_shift == 0;
+
next_cycle = EXECUTE;
- if((cycle == EXECUTE) & data_snd_shift_by_reg)
- next_cycle = RD_SHIFT;
+
+ unique case(cycle)
+ EXECUTE:
+ if(data_snd_shift_by_reg)
+ next_cycle = RD_INDIRECT_SHIFT;
+ else if(~trivial_shift)
+ next_cycle = WITH_SHIFT;
+
+ RD_INDIRECT_SHIFT:
+ if(~trivial_shift)
+ next_cycle = WITH_SHIFT;
+
+ default: ;
+ endcase
if(bubble)
next_cycle = EXECUTE;
unique case(cycle)
- RD_SHIFT:
- alu_base = saved_base;
-
- default:
+ EXECUTE:
if(data_snd_is_imm)
- alu_base = {{24{1'b0}}, data_imm};
+ alu_b = {{24{1'b0}}, data_imm};
else
- alu_base = rd_value_b;
- endcase
+ alu_b = rd_value_b;
- unique case(cycle)
- RD_SHIFT: alu_shift = rd_value_b[7:0];
- default: alu_shift = {2'b00, data_shift_imm};
+ RD_INDIRECT_SHIFT, WITH_SHIFT:
+ alu_b = saved_base;
endcase
end
always_ff @(posedge clk) begin
cycle <= next_cycle;
bubble <= 0;
+ writeback <= 0;
+ wr_value <= q_alu;
unique case(next_cycle)
EXECUTE: begin
@@ -84,40 +109,47 @@ module core_cycles
if(dec_execute & ~bubble) begin
bubble <=
(dec_update_flags & update_flags)
- | (final_writeback & ((rd == dec_alu.rn) | (rd == dec_alu.r_snd)));
+ | (final_writeback & ((rd == dec_data.rn) | (rd == dec_data.r_snd)));
branch <= dec_branch;
- final_writeback <= dec_writeback;
update_flags <= dec_update_flags;
branch_target <= pc_visible + dec_branch_offset;
- data_snd_is_imm <= dec_alu.snd_is_imm;
- data_snd_shift_by_reg <= dec_alu.snd_shift_by_reg;
- data_imm <= dec_alu.imm;
- data_shift_imm <= dec_alu.shift_imm;
-
- alu.op <= dec_alu.op;
- alu.shl <= dec_alu.shl;
- alu.shr <= dec_alu.shr;
- alu.ror <= dec_alu.ror;
- alu.put_carry <= dec_alu.put_carry;
- alu.sign_extend <= dec_alu.sign_extend;
-
- rd <= dec_alu.rd;
- ra <= dec_alu.rn;
- rb <= dec_alu.r_snd;
- r_shift <= dec_alu.r_shift;
+ data_snd_is_imm <= dec_data.snd_is_imm;
+ data_snd_shift_by_reg <= dec_data.snd_shift_by_reg;
+ data_imm <= dec_data.imm;
+ data_shift_imm <= dec_data.shift_imm;
+
+ alu <= dec_data.op;
+ shifter.shl <= dec_data.shl;
+ shifter.shr <= dec_data.shr;
+ shifter.ror <= dec_data.ror;
+ shifter.put_carry <= dec_data.put_carry;
+ shifter.sign_extend <= dec_data.sign_extend;
+
+ ra <= dec_data.rn;
+ rb <= dec_data.r_snd;
+ r_shift <= dec_data.r_shift;
+ c_in <= next_flags.c;
+
+ final_rd <= dec_data.rd;
+ final_writeback <= dec_writeback;
end
writeback <= final_writeback;
+ rd <= final_rd;
pc <= fetch_insn_pc;
end
- RD_SHIFT: begin
+ RD_INDIRECT_SHIFT: begin
rb <= r_shift;
data_snd_shift_by_reg <= 0;
saved_base <= rd_value_b;
- writeback <= 0;
+ end
+
+ WITH_SHIFT: begin
+ c_in <= c_shifter;
+ saved_base <= q_shifter;
end
endcase
end
@@ -127,10 +159,14 @@ module core_cycles
bubble = 0;
pc = 0;
+ c_in = 0;
branch = 1;
writeback = 0;
- data_snd_shift_by_reg = 0;
branch_target = 30'd0;
+ data_snd_shift_by_reg = 0;
+
+ final_rd = 0;
+ final_writeback = 0;
end
endmodule
diff --git a/rtl/core/decode/data.sv b/rtl/core/decode/data.sv
index 4dc51a4..a649440 100644
--- a/rtl/core/decode/data.sv
+++ b/rtl/core/decode/data.sv
@@ -3,13 +3,13 @@
module core_decode_data
(
- input word insn,
+ input word insn,
- output alu_decode decode,
- output logic writeback,
- update_flags,
- restore_spsr,
- undefined
+ output data_decode decode,
+ output logic writeback,
+ update_flags,
+ restore_spsr,
+ undefined
);
alu_op op;
diff --git a/rtl/core/decode/decode.sv b/rtl/core/decode/decode.sv
index af80135..f7d7be8 100644
--- a/rtl/core/decode/decode.sv
+++ b/rtl/core/decode/decode.sv
@@ -6,13 +6,13 @@ module core_decode
input word insn,
input psr_flags flags,
- output logic execute,
- undefined,
- writeback,
- update_flags,
- branch,
- output ptr branch_offset,
- output alu_decode alu
+ output logic execute,
+ undefined,
+ writeback,
+ update_flags,
+ branch,
+ output ptr branch_offset,
+ output data_decode data_ctrl
);
logic cond_undefined;
@@ -38,11 +38,11 @@ module core_decode
logic restore_spsr;
logic data_writeback, data_update_flags, data_undefined;
- alu_decode data_alu;
+ data_decode data;
core_decode_data group_data
(
- .decode(data_alu),
+ .decode(data),
.writeback(data_writeback),
.update_flags(data_update_flags),
.undefined(data_undefined),
@@ -55,20 +55,20 @@ module core_decode
branch = 0;
writeback = 0;
update_flags = 0;
- alu = {($bits(alu)){1'bx}};
+ data_ctrl = {($bits(data_ctrl)){1'bx}};
priority casez(insn `FIELD_OP)
`GROUP_B: begin
branch = 1;
if(branch_link) begin
- alu.rd = `R14;
+ data_ctrl.rd = `R14;
writeback = 1;
//TODO: Valor de LR
end
end
`GROUP_ALU: begin
- alu = data_alu;
+ data_ctrl = data;
writeback = data_writeback;
update_flags = data_update_flags;
undefined = undefined | data_undefined;
diff --git a/rtl/core/psr.sv b/rtl/core/psr.sv
index bc682c1..2c0d48f 100644
--- a/rtl/core/psr.sv
+++ b/rtl/core/psr.sv
@@ -7,11 +7,10 @@ module core_psr
alu_v_valid,
input psr_flags alu_flags,
- output psr_flags flags
+ output psr_flags flags,
+ next_flags
);
- psr_flags next_flags;
-
always_comb begin
next_flags = flags;
diff --git a/rtl/core/shifter.sv b/rtl/core/shifter.sv
new file mode 100644
index 0000000..994e76c
--- /dev/null
+++ b/rtl/core/shifter.sv
@@ -0,0 +1,42 @@
+`include "core/uarch.sv"
+
+module core_shifter
+#(parameter W=16)
+(
+ input shifter_control ctrl,
+ input logic[W - 1:0] base,
+ input logic[7:0] shift,
+ input logic c_in,
+
+ output logic[W - 1:0] q,
+ output logic c
+);
+
+ localparam LOG = $clog2(W);
+
+ logic[W - 1:0] q_no_c, q_shl, q_shr, q_ror;
+ logic[W:0] sign_mask;
+ logic c_shl, c_shr, c_ror;
+
+ assign sign_mask = {(W + 1){ctrl.sign_extend & base[W - 1]}};
+ assign {c_shl, q_shl} = {c_in, base} << shift;
+ assign {q_shr, c_shr} = {base, c_in} >> shift | ~(sign_mask >> shift);
+
+ logic ror_cycle;
+ logic[LOG - 1:0] ror_shift;
+ logic[2 * W:0] ror_out;
+
+ assign ror_shift = shift[LOG - 1:0];
+ assign ror_cycle = |shift[7:LOG] & ~|ror_shift;
+ assign ror_out = {base, base, c_in} >> {ror_cycle, ror_shift};
+ assign {q_ror, c_ror} = ror_out[W:0];
+
+ always_comb
+ if(ctrl.ror)
+ {c, q} = {c_ror, q_ror};
+ else if(ctrl.shr)
+ {c, q} = {c_shr, q_shr[W - 1] | (ctrl.put_carry & c_in), q_shr[W - 2:0]};
+ else
+ {c, q} = {c_shl, q_shl};
+
+endmodule
diff --git a/rtl/core/uarch.sv b/rtl/core/uarch.sv
index c382cba..0c5bc84 100644
--- a/rtl/core/uarch.sv
+++ b/rtl/core/uarch.sv
@@ -72,16 +72,15 @@ typedef struct packed
sign_extend;
logic[7:0] imm;
logic[5:0] shift_imm;
-} alu_decode;
+} data_decode;
typedef struct packed
{
- alu_op op;
- logic shl,
- shr,
- ror,
- put_carry,
- sign_extend;
-} alu_control;
+ logic shl,
+ shr,
+ ror,
+ put_carry,
+ sign_extend;
+} shifter_control;
`endif