This commit is contained in:
cturek 2022-07-13 17:36:56 +00:00
commit e9ce71ca20
13 changed files with 51 additions and 49 deletions

View File

@ -32,7 +32,7 @@
`define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 32
`define XLEN 64
// IEEE 754 compliance
`define IEEE754 0

View File

@ -101,14 +101,15 @@
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
// division constants
`define RADIX 4
`define DIVCOPIES 4
`define RADIX 32'h4
`define DIVCOPIES 32'h4
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
`define LOGR ((`RADIX==2) ? 1 : 2)
`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES))
`define DURLEN ($clog2($rtoi(`FPDUR)+1))
`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
`define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define USE_SRAM 0

View File

@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*

View File

@ -52,7 +52,7 @@ module divsqrt(
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] NextWSN, NextWCN;
logic [`DIVLEN+3:0] WS, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X;
@ -61,8 +61,8 @@ module divsqrt(
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
endmodule

View File

@ -68,7 +68,8 @@ module fcvt (
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability
@ -102,10 +103,11 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// shifter
@ -119,13 +121,13 @@ module fcvt (
// denormalized/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by LeadingZeros+1 - to shift till the result is normalized
// - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] :
(LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
(LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}};
///////////////////////////////////////////////////////////////////////////
// exp calculations
@ -197,14 +199,14 @@ module fcvt (
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
// - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
// Process:
// - shifted right by XLEN (XLEN)
// - shift left to normilize (-1-LeadingZeros)
// - shift left to normilize (-LeadingZeros)
// - newBias to make the biased exponent
// oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion

View File

@ -53,7 +53,8 @@ module fmashiftcalc(
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
// ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin

View File

@ -29,7 +29,7 @@
`include "wally-config.vh"
module postprocess(
module postprocess (
// general signals
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents

View File

@ -41,7 +41,7 @@ module srtradix4(
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`QLEN-1:0] Quot,
output logic [`DIVLEN+3:0] WSN, WCN,
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
@ -58,11 +58,12 @@ module srtradix4(
logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
logic [`QLEN-1:0] QMux, QMMux;
logic [`QLEN-1:0] QMMux;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -72,9 +73,11 @@ module srtradix4(
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
@ -88,10 +91,10 @@ module srtradix4(
genvar i;
generate
for(i=0; i<`DIVCOPIES; i++) begin
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
if(i<3) begin
if(i<(`DIVCOPIES-1)) begin
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
assign Q[i+1] = QNext[i];
@ -101,9 +104,8 @@ module srtradix4(
endgenerate
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
assign Quot = Q[0];
@ -181,7 +183,7 @@ module qsel4 (
logic [3:0] QSel4[1023:0];
initial begin
always_comb begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
@ -270,9 +272,9 @@ module otfc4 (
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
assign QR = Q[`QLEN-3:0];
assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM
always_comb begin
QR = Q[`QLEN-3:0];
QMR = QM[`QLEN-3:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
@ -352,5 +354,5 @@ module expcalc(
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

View File

@ -33,7 +33,7 @@
module srtfsm(
input logic clk,
input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
@ -58,8 +58,8 @@ module srtfsm(
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = ~WZero;
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = |W;
assign DivDone = (state == DONE);
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???

View File

@ -63,8 +63,7 @@ module srtpreproc (
assign X = PreprocX;
assign Dpreproc = PreprocY;
assign Dur = (`DURLEN)'($rtoi(`FPDUR));
assign Dur = (`DURLEN)'(`FPDUR);
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);

View File

@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
/* verilator lint_off CMPCONST */
/* verilator lint_off WIDTH */
int i;
logic [31:0] i;
always_comb begin
i = 0;
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one

View File

@ -87,8 +87,8 @@ module testbenchfp;
logic reset = 1'b0;
logic [`DIVLEN-1:0] DivX;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WSN, WS;
logic [`DIVLEN+3:0] WCN, WC;
logic [`DIVLEN+3:0] NextWSN, WS;
logic [`DIVLEN+3:0] NextWCN, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DURLEN-1:0] Dur;
@ -696,9 +696,9 @@ module testbenchfp;
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
.Quot, .Rem(), .DivCalcExpM(DivCalcExp));
assign CmpFlg[3:0] = 0;

View File

@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }