radix-4 early termination working for special cases - not working completely

This commit is contained in:
Katherine Parry 2022-06-27 20:43:55 +00:00
parent adaee899bb
commit a5fb60eb1a
7 changed files with 114 additions and 52 deletions

View File

@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -3,6 +3,8 @@
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`NE+1:0] DivCalcExpM,
input logic [`FMTBITS-1:0] FmtM,
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic [`NE+1:0] CorrDivExp
@ -10,30 +12,81 @@ module divshiftcalc(
logic ResDenorm;
logic [`NE+1:0] DenormShift;
logic [`NE+1:0] NormShift;
logic [`NE+1:0] Nf, NfPlus1;
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
// select the proper fraction lengnth
if (`FPSIZES == 1) begin
assign Nf = (`NE+2)'(`NF);
assign NfPlus1 = (`NE+2)'(`NF+1);
end else if (`FPSIZES == 2) begin
assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtM)
`FMT: begin
Nf = (`NE+2)'(`NF);
NfPlus1 = (`NE+2)'(`NF+1);
end
`FMT1: begin
Nf = (`NE+2)'(`NF1);
NfPlus1 = (`NE+2)'(`NF1+1);
end
`FMT2: begin
Nf = (`NE+2)'(`NF2);
NfPlus1 = (`NE+2)'(`NF2+1);
end
default: begin
Nf = 1'bx;
NfPlus1 = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtM)
2'h3: begin
Nf = (`NE+2)'(`Q_NF);
NfPlus1 = (`NE+2)'(`Q_NF+1);
end
2'h1: begin
Nf = (`NE+2)'(`D_NF);
NfPlus1 = (`NE+2)'(`D_NF+1);
end
2'h0: begin
Nf = (`NE+2)'(`S_NF);
NfPlus1 = (`NE+2)'(`S_NF+1);
end
2'h2: begin
Nf = (`NE+2)'(`H_NF);
NfPlus1 = (`NE+2)'(`H_NF+1);
end
endcase
end
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .000xxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = 0
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM;
// if the result is denormalized
assign DenormShift = Nf+DivCalcExpM;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000x.xxxxxx... << NF+1 Exp = DivCalcExp
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1
// Left shift amount = NF+1 plus 1 if normalization required
assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
assign DivShiftAmt = (ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
// *** may be able to reduce shifter size
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
// the quotent is in the range [.5,2)
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};

View File

@ -127,6 +127,7 @@ module fpu (
logic [`NE+1:0] DivCalcExpM;
logic DivNegStickyM;
logic DivStickyM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
@ -357,7 +358,7 @@ module fpu (
assign FpLoadM = FResSelM[1];
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,

View File

@ -49,6 +49,7 @@ module postprocess(
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE+1:0] DivCalcExpM, // the calculated expoent
@ -143,7 +144,7 @@ module postprocess(
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSelM)

View File

@ -211,7 +211,7 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754) begin

View File

@ -36,11 +36,14 @@ module srtradix4 (
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF:0] XManE, YManE,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic XZeroE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivDone,
output logic DivStickyE,
output logic DivNegStickyE,
@ -49,10 +52,9 @@ module srtradix4 (
output logic [`NE+1:0] DivCalcExpE
);
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE+1:0] DivCalcExp;
logic [`DIVLEN:0] X;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
@ -68,13 +70,11 @@ module srtradix4 (
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - assumed one is added here since all numbers are normlaized
// *** wait what about zero? is that specal case? can the divider handle it?
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
@ -117,12 +117,11 @@ module srtradix4 (
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
assign DivStickyE = (WS+WC) != 0; //replace with early termination
assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
divcounter divcounter(clk, DivStart, DivDone);
earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
.YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
endmodule
@ -130,28 +129,35 @@ endmodule
// Submodules //
////////////////
/////////////
// counter //
/////////////
module divcounter(input logic clk,
input logic DivStart,
output logic DivDone);
module earlytermination(
input logic clk,
input logic [`DIVLEN+3:0] WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivStickyE,
output logic DivNegStickyE,
output logic DivDone);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
logic [$clog2(`DIVLEN/2+3)-1:0] Count;
logic WZero;
assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
// *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
assign DivDone = (DivStickyE | WZero);
assign DivStickyE = ~|Count;
assign DivNegStickyE = $signed(WS+WC) < 0;
assign EarlyTermShiftDiv2E = Count;
// +1 for setup
// `DIVLEN/2 to get required number of bits
// +1 for possible .5 and round bit
// Count down Counter
always @(posedge clk)
begin
DivDone = 0;
if (count == `DIVLEN/2+1) DivDone <= #1 1;
else if (DivDone | DivStart) DivDone <= #1 0;
if (DivStart) count <= #1 0;
else count <= #1 count+1;
if (DivStart) Count <= #1 `DIVLEN/2+2;
else Count <= #1 Count-1;
end
endmodule
@ -237,7 +243,7 @@ module srtpreproc (
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN:0] X,
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
@ -245,7 +251,7 @@ module srtpreproc (
);
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
@ -263,7 +269,7 @@ module srtpreproc (
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
@ -300,7 +306,7 @@ module otfc4 (
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
@ -331,8 +337,7 @@ module otfc4 (
QMNext = {QMR, 2'b11};
end
end
// Quot is in the range [.5, 2) so normalize the result if nesissary
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
// Final Quoteint is in the range [.5, 2)
endmodule
@ -371,7 +376,7 @@ module expcalc(
output logic [`NE+1:0] DivCalcExp
);
// correct exponent for denormal shifts
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
endmodule

View File

@ -55,6 +55,7 @@ module testbenchfp;
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic CvtResDenormUfE;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
logic DivStart, DivDone;
@ -651,7 +652,7 @@ module testbenchfp;
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
@ -660,9 +661,9 @@ module testbenchfp;
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .DivStickyE(DivSticky),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
.DivNegStickyE(DivNegSticky), .DivDone, .Quot, .Rem());
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
assign CmpFlg[3:0] = 0;
@ -815,8 +816,9 @@ end
///////////////////////////////////////////////////////////////////////////////////////////////
// check if the non-fma test is correct
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
// check if result is correct
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -839,7 +841,7 @@ end
$stop;
end
if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file