summaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2022-11-08 15:59:12 -0600
committerAlejandro Soto <alejandro@34project.org>2022-11-08 15:59:12 -0600
commit13b112dcb8f67778c6a140cb5ce8f1ab21aa6fb9 (patch)
tree1c33528b0559a7b51f67a3c0df92202713e1a53b /rtl
parentf6929f9a4703e3eee9d7bd9752de055729cdd498 (diff)
Improve ALU performance
Diffstat (limited to '')
-rw-r--r--rtl/core/alu/add.sv37
-rw-r--r--rtl/core/alu/alu.sv124
-rw-r--r--rtl/core/decode/isa.sv6
3 files changed, 99 insertions, 68 deletions
diff --git a/rtl/core/alu/add.sv b/rtl/core/alu/add.sv
index 12bd237..a15a6b6 100644
--- a/rtl/core/alu/add.sv
+++ b/rtl/core/alu/add.sv
@@ -1,20 +1,45 @@
module core_alu_add
-#(parameter W=16)
+#
+(
+ parameter W=16,
+ parameter SUB=0
+)
(
input logic[W - 1:0] a,
b,
- c_in,
+ input logic c_in,
output logic[W - 1:0] q,
output logic c,
v
);
- logic sgn_a, sgn_b, sgn_q;
+ logic sgn_a, sgn_b, sgn_q, maybe_v;
+ logic[W:0] out;
+
+ /* Quartus infiere dos sumadores si se zero-extendea el cin
+ * para complacer a Verilator, lo cual es malo para Fmax.
+ */
+`ifdef VERILATOR
+ logic[W:0] ext_carry;
+ assign ext_carry = {{W{1'b0}}, c_in};
+`else
+ logic ext_carry;
+ assign ext_carry = c_in;
+`endif
+
+ assign v = maybe_v & (sgn_a ^ sgn_q);
+ assign {c, q} = out;
assign {sgn_a, sgn_b, sgn_q} = {a[W - 1], b[W - 1], q[W - 1]};
- //TODO: No sirve el carry
- assign {c, q} = {1'b0, a} + {1'b0, b} + {1'b0, c_in};
- assign v = (sgn_a ~^ sgn_b) & (sgn_a ^ sgn_q);
+ generate
+ if(SUB) begin
+ assign out = {1'b1, a} - {1'b0, b} - ext_carry;
+ assign maybe_v = sgn_a ^ sgn_b;
+ end else begin
+ assign out = {1'b0, a} + {1'b0, b} + ext_carry;
+ assign maybe_v = sgn_a ~^ sgn_b;
+ end
+ endgenerate
endmodule
diff --git a/rtl/core/alu/alu.sv b/rtl/core/alu/alu.sv
index d999164..1e19338 100644
--- a/rtl/core/alu/alu.sv
+++ b/rtl/core/alu/alu.sv
@@ -1,4 +1,5 @@
`include "core/uarch.sv"
+`include "core/decode/isa.sv"
module core_alu
#(parameter W=16)
@@ -13,31 +14,53 @@ module core_alu
output logic v_valid
);
- logic c, v, swap, sub, and_not, c_add, v_add;
- logic[W - 1:0] swap_a, swap_b, not_b, c_in_add, q_add, q_and, q_orr, q_xor;
+ logic c, v, c_add, c_sub, c_rsb, v_add, v_sub, v_rsb;
+ logic[W - 1:0] not_b, q_add, q_sub, q_rsb, q_and, q_bic, q_orr, q_xor;
- assign swap_a = swap ? b : a;
- assign swap_b = swap ? a : b;
assign not_b = ~b;
- core_alu_add #(.W(W)) op_add
+ core_alu_add #(.W(W), .SUB(0)) op_add
(
- .a(swap_a),
- .b(sub ? -swap_b : swap_b),
- .c_in(c_in_add),
.q(q_add),
.c(c_add),
.v(v_add),
+ .c_in(c_in && !op `FIELD_ALUOP_ADD_CMN && op `FIELD_ALUOP_ADD_NOTCMN_ADC),
+ .*
+ );
+
+ core_alu_add #(.W(W), .SUB(1)) op_sub
+ (
+ .q(q_sub),
+ .c(c_sub),
+ .v(v_sub),
+ .c_in(c_in && op `FIELD_ALUOP_SUB_SBC),
+ .*
+ );
+
+ core_alu_add #(.W(W), .SUB(1)) op_rsb
+ (
+ .a(b),
+ .b(a),
+ .q(q_rsb),
+ .c(c_rsb),
+ .v(v_rsb),
+ .c_in(c_in && op `FIELD_ALUOP_RSB_RSC),
.*
);
core_alu_and #(.W(W)) op_and
(
- .b(and_not ? not_b : b),
.q(q_and),
.*
);
+ core_alu_and #(.W(W)) op_bic
+ (
+ .b(not_b),
+ .q(q_bic),
+ .*
+ );
+
core_alu_orr #(.W(W)) op_orr
(
.q(q_orr),
@@ -52,58 +75,20 @@ module core_alu
always_comb begin
unique case(op)
- `ALU_ADD, `ALU_ADC, `ALU_CMN, `ALU_CMP, `ALU_SUB, `ALU_SBC:
- swap = 0;
-
- `ALU_RSB, `ALU_RSC:
- swap = 1;
-
- default:
- swap = 1'bx;
- endcase
-
- unique case(op)
- `ALU_ADD, `ALU_CMN, `ALU_ADC:
- sub = 0;
-
- `ALU_SUB, `ALU_CMP, `ALU_SBC, `ALU_RSB, `ALU_RSC:
- sub = 1;
-
- default:
- sub = 1'bx;
- endcase
-
- unique case(op)
- `ALU_ADD, `ALU_CMN, `ALU_CMP, `ALU_SUB, `ALU_RSB:
- c_in_add = 0;
-
- `ALU_ADC:
- c_in_add = {{(W - 1){1'b0}}, c_in};
+ `ALU_ADD, `ALU_ADC, `ALU_CMN:
+ q = q_add;
- `ALU_SBC, `ALU_RSC:
- c_in_add = {{(W - 1){~c_in}}, ~c_in};
+ `ALU_SUB, `ALU_SBC, `ALU_CMP:
+ q = q_sub;
- default:
- c_in_add = {W{1'bx}};
- endcase
+ `ALU_RSB, `ALU_RSC:
+ q = q_rsb;
- unique case(op)
`ALU_AND, `ALU_TST:
- and_not = 0;
+ q = q_and;
`ALU_BIC:
- and_not = 1;
-
- default:
- and_not = 1'bx;
- endcase
-
- unique case(op)
- `ALU_SUB, `ALU_RSB, `ALU_ADD, `ALU_ADC, `ALU_SBC, `ALU_RSC, `ALU_CMP, `ALU_CMN:
- q = q_add;
-
- `ALU_AND, `ALU_TST, `ALU_BIC:
- q = q_and;
+ q = q_bic;
`ALU_EOR, `ALU_TEQ:
q = q_xor;
@@ -118,21 +103,36 @@ module core_alu
q = not_b;
endcase
+ v = 1'bx;
unique case(op)
- `ALU_AND, `ALU_EOR, `ALU_TST, `ALU_TEQ, `ALU_ORR, `ALU_MOV, `ALU_BIC, `ALU_MVN: begin
+ `ALU_AND, `ALU_EOR, `ALU_TST, `ALU_TEQ, `ALU_ORR, `ALU_MOV, `ALU_BIC, `ALU_MVN:
c = c_in;
- v = 1'bx;
- v_valid = 0;
- end
- `ALU_SUB, `ALU_RSB, `ALU_ADD, `ALU_ADC, `ALU_SBC, `ALU_RSC, `ALU_CMP, `ALU_CMN: begin
+ `ALU_ADD, `ALU_ADC, `ALU_CMN: begin
c = c_add;
v = v_add;
- v_valid = 1;
end
+
+ `ALU_SUB, `ALU_SBC, `ALU_CMP: begin
+ c = c_sub;
+ v = v_sub;
+ end
+
+ `ALU_RSB, `ALU_RSC: begin
+ c = c_rsb;
+ v = v_rsb;
+ end
+ endcase
+
+ unique case(op)
+ `ALU_AND, `ALU_EOR, `ALU_TST, `ALU_TEQ, `ALU_ORR, `ALU_MOV, `ALU_BIC, `ALU_MVN:
+ v_valid = 0;
+
+ `ALU_SUB, `ALU_RSB, `ALU_ADD, `ALU_ADC, `ALU_SBC, `ALU_RSC, `ALU_CMP, `ALU_CMN:
+ v_valid = 1;
endcase
end
- assign nzcv = {q[W - 1], ~|q, c, v};
+ assign nzcv = {q[W - 1], q == 0, c, v};
endmodule
diff --git a/rtl/core/decode/isa.sv b/rtl/core/decode/isa.sv
index 4bd2dea..3e3a6d1 100644
--- a/rtl/core/decode/isa.sv
+++ b/rtl/core/decode/isa.sv
@@ -21,6 +21,12 @@
`define COND_AL 4'b1110
`define COND_UD 4'b1111 // Indefnido antes de ARMv5
+// Necesario para evitar caminos combinacionales largos en ALU
+`define FIELD_ALUOP_SUB_SBC [2]
+`define FIELD_ALUOP_RSB_RSC [2]
+`define FIELD_ALUOP_ADD_CMN [1]
+`define FIELD_ALUOP_ADD_NOTCMN_ADC [0]
+
// Segundo operando de varios grupos de instrucciones
`define FIELD_SND_ROR8 [11:8]