Starting to optimize multiplier

This commit is contained in:
David Harris 2021-10-11 11:06:07 -07:00
parent 76e186693c
commit ab6a796690
2 changed files with 34 additions and 15 deletions

View File

@ -27,9 +27,11 @@
module mul (
// Execute Stage interface
input logic clk, reset,
input logic StallM, FlushM,
input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [2:0] Funct3E,
output logic [`XLEN*2-1:0] ProdE
output logic [`XLEN*2-1:0] ProdM
);
// Number systems
@ -46,16 +48,25 @@ module mul (
// Signed * Unsigned = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2)
// Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2)
logic [`XLEN*2-1:0] PP1, PP2, PP3, PP4;
logic [`XLEN*2-1:0] PP0E, PP1E, PP2E, PP3E, PP4E;
logic [`XLEN*2-1:0] PP0M, PP1M, PP2M, PP3M, PP4M;
logic [`XLEN*2-1:0] Pprime;
logic [`XLEN-2:0] PA, PB;
logic PP;
logic MULH, MULHSU, MULHU;
logic [`XLEN-1:0] Aprime, Bprime;
//////////////////////////////
// Execute Stage: Compute partial products
//////////////////////////////
// portions of product
assign Pprime = {1'b0, SrcAE[`XLEN-2:0]} * {1'b0, SrcBE[`XLEN-2:0]};
//assign Pprime = {1'b0, SrcAE[`XLEN-2:0]} * {1'b0, SrcBE[`XLEN-2:0]};
// *** assumes unsigned multiplication
assign Aprime = {1'b0, SrcAE[`XLEN-2:0]};
assign Bprime = {1'b0, SrcBE[`XLEN-2:0]};
DW02_multp #(`XLEN, `XLEN, 2*`XLEN) bigmul(.a(Aprime), .b(Bprime), .tc(1'b0), .out0(PP0E), .out1(PP1E));
// DW02_multp #((`XLEN-1), (`XLEN-1), 2*(`XLEN-1)) multp_dw( .a(SrcAE[`XLEN-2:0]), .b(SrcBE[`XLEN-2:0]), .tc(1'b0), .out0(Pprime0), .out1(Pprime1) );
assign PA = {(`XLEN-1){SrcAE[`XLEN-1]}} & SrcBE[`XLEN-2:0];
assign PB = {(`XLEN-1){SrcBE[`XLEN-1]}} & SrcAE[`XLEN-2:0];
@ -67,16 +78,25 @@ module mul (
// assign MULHU = (Funct3E == 2'b11); // signal unused
// Handle signs
assign PP1 = Pprime; // same for all flavors
assign PP2 = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}};
assign PP3 = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}};
// assign PP0E = 0;
// assign PP1E = Pprime; // same for all flavors
assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}};
assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}};
always_comb
if (MULH) PP4 = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}};
else if (MULHSU) PP4 = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}};
else PP4 = {1'b0, PP, {(`XLEN*2-2){1'b0}}};
if (MULH) PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}};
else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}};
else PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}};
// ***Put register before this final addition
assign ProdE = PP1 + PP2 + PP3 + PP4; //SrcAE * SrcBE;
// assign ProdE = Pprime0 + Pprime1 + PP2 + PP3 + PP4; //SrcAE * SrcBE;
//////////////////////////////
// Memory Stage: Sum partial proudcts
//////////////////////////////
flopenrc #(`XLEN*2) PP0Reg(clk, reset, FlushM, ~StallM, PP0E, PP0M);
flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M);
flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M);
flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M);
flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M);
assign ProdM = PP0M + PP1M + PP2M + PP3M + PP4M; //SrcAE * SrcBE;
endmodule

View File

@ -43,10 +43,10 @@ module muldiv (
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] MulDivResultM;
logic [`XLEN-1:0] PrelimResultM;
logic [`XLEN-1:0] QuotM, RemM;
logic [`XLEN*2-1:0] ProdE, ProdM;
logic [`XLEN*2-1:0] ProdM;
logic DivE;
logic DivSignedE;
@ -54,7 +54,6 @@ module muldiv (
// Multiplier
mul mul(.*);
flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM);
// Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing