removed warnings and took a mux out of the critical path

This commit is contained in:
Katherine Parry 2022-07-12 18:32:17 -07:00
parent 452b017f9a
commit e05b2a07d2
12 changed files with 38 additions and 38 deletions

View File

@ -32,7 +32,7 @@
`define DESIGN_COMPILER 0 `define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64 // RV32 or RV64: XLEN = 32 or 64
`define XLEN 32 `define XLEN 64
// IEEE 754 compliance // IEEE 754 compliance
`define IEEE754 0 `define IEEE754 0

View File

@ -101,14 +101,15 @@
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
// division constants // division constants
`define RADIX 4 `define RADIX 32'h4
`define DIVCOPIES 4 `define DIVCOPIES 32'h4
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
`define LOGR ((`RADIX==2) ? 1 : 2) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
`define DURLEN ($clog2($rtoi(`FPDUR)+1)) `define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) `define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define USE_SRAM 0 `define USE_SRAM 0

View File

@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*

View File

@ -52,7 +52,7 @@ module divsqrt(
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
); );
logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] NextWSN, NextWCN;
logic [`DIVLEN+3:0] WS, WC; logic [`DIVLEN+3:0] WS, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] X;
@ -61,8 +61,8 @@ module divsqrt(
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
endmodule endmodule

View File

@ -53,7 +53,8 @@ module fmashiftcalc(
assign FmaSZero = ~(|FmaSm); assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent // calculate the sum's exponent
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision //convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin

View File

@ -29,7 +29,7 @@
`include "wally-config.vh" `include "wally-config.vh"
module postprocess( module postprocess (
// general signals // general signals
input logic Xs, Ys, // input signs input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents input logic [`NE-1:0] Ze, // input exponents

View File

@ -41,7 +41,7 @@ module srtradix4(
input logic [`DIVLEN-1:0] Dpreproc, input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`QLEN-1:0] Quot, output logic [`QLEN-1:0] Quot,
output logic [`DIVLEN+3:0] WSN, WCN, output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM, output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem output logic [`XLEN-1:0] Rem
@ -58,11 +58,12 @@ module srtradix4(
logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
/* verilator lint_on UNOPTFLAT */ /* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp; logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp; logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign; logic intSign;
logic [`QLEN-1:0] QMux, QMMux; logic [`QLEN-1:0] QMMux;
// Top Muxes and Registers // Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider. // When start is asserted, the inputs are loaded into the divider.
@ -72,9 +73,11 @@ module srtradix4(
// - otherwise load WSA into the flipflop // - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
@ -88,10 +91,10 @@ module srtradix4(
genvar i; genvar i;
generate generate
for(i=0; i<`DIVCOPIES; i++) begin for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
if(i<3) begin if(i<(`DIVCOPIES-1)) begin
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
assign Q[i+1] = QNext[i]; assign Q[i+1] = QNext[i];
@ -101,9 +104,8 @@ module srtradix4(
endgenerate endgenerate
// if starting a new divison set Q to 0 and QM to -1 // if starting a new divison set Q to 0 and QM to -1
mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flop #(`QLEN) QMreg(clk, QMMux, QM[0]); flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
assign Quot = Q[0]; assign Quot = Q[0];
@ -181,7 +183,7 @@ module qsel4 (
logic [3:0] QSel4[1023:0]; logic [3:0] QSel4[1023:0];
initial begin always_comb begin
integer d, w, i, w2; integer d, w, i, w2;
for(d=0; d<8; d++) for(d=0; d<8; d++)
for(w=0; w<128; w++)begin for(w=0; w<128; w++)begin
@ -270,9 +272,9 @@ module otfc4 (
// else if q = -2 Q = {QM, 10} QM = {QM, 01} // else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work? // *** how does the 0 concatination numbers work?
assign QR = Q[`QLEN-3:0];
assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM
always_comb begin always_comb begin
QR = Q[`QLEN-3:0];
QMR = QM[`QLEN-3:0]; // Shift Q and QM
if (q[3]) begin // +2 if (q[3]) begin // +2
QNext = {QR, 2'b10}; QNext = {QR, 2'b10};
QMNext = {QR, 2'b01}; QMNext = {QR, 2'b01};
@ -352,5 +354,5 @@ module expcalc(
endcase endcase
end end
// correct exponent for denormalized input's normalization shifts // correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule endmodule

View File

@ -33,7 +33,7 @@
module srtfsm( module srtfsm(
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC, input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
@ -58,8 +58,8 @@ module srtfsm(
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY); assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0}); assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = ~WZero; assign DivStickyE = |W;
assign DivDone = (state == DONE); assign DivDone = (state == DONE);
assign W = WC+WS; assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???

View File

@ -63,8 +63,7 @@ module srtpreproc (
assign X = PreprocX; assign X = PreprocX;
assign Dpreproc = PreprocY; assign Dpreproc = PreprocY;
assign Dur = (`DURLEN)'(`FPDUR);
assign Dur = (`DURLEN)'($rtoi(`FPDUR));
// assign intExp = zeroCntB - zeroCntA + 1; // assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);

View File

@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
/* verilator lint_off CMPCONST */ /* verilator lint_off CMPCONST */
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
int i; logic [31:0] i;
always_comb begin always_comb begin
i = 0; i = 0;
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one

View File

@ -87,8 +87,8 @@ module testbenchfp;
logic reset = 1'b0; logic reset = 1'b0;
logic [`DIVLEN-1:0] DivX; logic [`DIVLEN-1:0] DivX;
logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WSN, WS; logic [`DIVLEN+3:0] NextWSN, WS;
logic [`DIVLEN+3:0] WCN, WC; logic [`DIVLEN+3:0] NextWCN, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DURLEN-1:0] Dur; logic [`DURLEN-1:0] Dur;
@ -696,9 +696,9 @@ module testbenchfp;
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
.Quot, .Rem(), .DivCalcExpM(DivCalcExp)); .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
assign CmpFlg[3:0] = 0; assign CmpFlg[3:0] = 0;

View File

@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" } redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" } redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" } redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }