diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index a58400cca..7dfec7e24 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index bee4d09c2..d867efc44 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -3,6 +3,8 @@ module divshiftcalc( input logic [`DIVLEN+2:0] Quot, input logic [`NE+1:0] DivCalcExpM, + input logic [`FMTBITS-1:0] FmtM, + input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, output logic [`NE+1:0] CorrDivExp @@ -10,30 +12,81 @@ module divshiftcalc( logic ResDenorm; logic [`NE+1:0] DenormShift; logic [`NE+1:0] NormShift; + logic [`NE+1:0] Nf, NfPlus1; // is the result denromalized // if the exponent is 1 then the result needs to be normalized then the result is denormalizes assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2])); + // select the proper fraction lengnth + if (`FPSIZES == 1) begin + assign Nf = (`NE+2)'(`NF); + assign NfPlus1 = (`NE+2)'(`NF+1); + + end else if (`FPSIZES == 2) begin + assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1); + assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1); + + end else if (`FPSIZES == 3) begin + always_comb + case (FmtM) + `FMT: begin + Nf = (`NE+2)'(`NF); + NfPlus1 = (`NE+2)'(`NF+1); + end + `FMT1: begin + Nf = (`NE+2)'(`NF1); + NfPlus1 = (`NE+2)'(`NF1+1); + end + `FMT2: begin + Nf = (`NE+2)'(`NF2); + NfPlus1 = (`NE+2)'(`NF2+1); + end + default: begin + Nf = 1'bx; + NfPlus1 = 1'bx; + end + endcase + end else if (`FPSIZES == 4) begin + always_comb + case (FmtM) + 2'h3: begin + Nf = (`NE+2)'(`Q_NF); + NfPlus1 = (`NE+2)'(`Q_NF+1); + end + 2'h1: begin + Nf = (`NE+2)'(`D_NF); + NfPlus1 = (`NE+2)'(`D_NF+1); + end + 2'h0: begin + Nf = (`NE+2)'(`S_NF); + NfPlus1 = (`NE+2)'(`S_NF+1); + end + 2'h2: begin + Nf = (`NE+2)'(`H_NF); + NfPlus1 = (`NE+2)'(`H_NF+1); + end + endcase + end // if the result is denormalized // 00000000x.xxxxxx... Exp = DivCalcExp // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 // .000xxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = 0 // .0000xxxxxxxxxxx... >> 1 Exp = 1 // Left shift amount = DivCalcExp+NF+1-1 - assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM; - // if the result is denormalized + assign DenormShift = Nf+DivCalcExpM; + // if the result is normalized // 00000000x.xxxxxx... Exp = DivCalcExp // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 // 00000000x.xxxxxx... << NF+1 Exp = DivCalcExp // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 // Left shift amount = NF+1 plus 1 if normalization required - assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]}; + assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]}; // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0]; + assign DivShiftAmt = (ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0}; // *** may be able to reduce shifter size assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; - // the quotent is in the range [.5,2) + // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 2f43b27d4..aba1a8f48 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -127,6 +127,7 @@ module fpu ( logic [`NE+1:0] DivCalcExpM; logic DivNegStickyM; logic DivStickyM; + logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -357,7 +358,7 @@ module fpu ( assign FpLoadM = FResSelM[1]; - postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, + postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM, diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 6d80f661d..217e3f586 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -49,6 +49,7 @@ module postprocess( input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) + input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic [`NE+1:0] DivCalcExpM, // the calculated expoent @@ -143,7 +144,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSelM) diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv index a0bf86d85..d6d15e46f 100644 --- a/pipelined/src/fpu/resultselect.sv +++ b/pipelined/src/fpu/resultselect.sv @@ -211,7 +211,7 @@ module resultselect( // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1); + assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp)); // output infinity with result sign if divide by zero if(`IEEE754) begin diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index a49838ace..179fbf45a 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -36,11 +36,14 @@ module srtradix4 ( input logic [`NE-1:0] XExpE, YExpE, input logic [`NF:0] XManE, YManE, input logic [`XLEN-1:0] SrcA, SrcB, - input logic XZeroE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide + output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, output logic DivDone, output logic DivStickyE, output logic DivNegStickyE, @@ -49,10 +52,9 @@ module srtradix4 ( output logic [`NE+1:0] DivCalcExpE ); - // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; logic [`NE+1:0] DivCalcExp; - logic [`DIVLEN:0] X; + logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; @@ -68,13 +70,11 @@ module srtradix4 ( // When start is asserted, the inputs are loaded into the divider. // Otherwise, the divisor is retained and the partial remainder // is fed back for the next iteration. - // - assumed one is added here since all numbers are normlaized - // *** wait what about zero? is that specal case? can the divider handle it? // - when the start signal is asserted X and 0 are loaded into WS and WC // - otherwise load WSA into the flipflop - // *** what does N and A stand for? - // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN); + // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) + // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); @@ -117,12 +117,11 @@ module srtradix4 ( //*** change for radix 4 otfc4 otfc4(.clk, .DivStart, .q, .Quot); - assign DivStickyE = (WS+WC) != 0; //replace with early termination - assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); - divcounter divcounter(clk, DivStart, DivDone); + earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E, + .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone); endmodule @@ -130,28 +129,35 @@ endmodule // Submodules // //////////////// -///////////// -// counter // -///////////// -module divcounter(input logic clk, - input logic DivStart, - output logic DivDone); +module earlytermination( + input logic clk, + input logic [`DIVLEN+3:0] WS, WC, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic DivStart, + output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, + output logic DivStickyE, + output logic DivNegStickyE, + output logic DivDone); - logic [5:0] count; - - // This block of control logic sequences the divider - // through its iterations. You may modify it if you - // build a divider which completes in fewer iterations. - // You are not responsible for the (trivial) circuit - // design of the block. + logic [$clog2(`DIVLEN/2+3)-1:0] Count; + logic WZero; + assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary + // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop??? + assign DivDone = (DivStickyE | WZero); + assign DivStickyE = ~|Count; + assign DivNegStickyE = $signed(WS+WC) < 0; + assign EarlyTermShiftDiv2E = Count; + // +1 for setup + // `DIVLEN/2 to get required number of bits + // +1 for possible .5 and round bit + // Count down Counter always @(posedge clk) begin - DivDone = 0; - if (count == `DIVLEN/2+1) DivDone <= #1 1; - else if (DivDone | DivStart) DivDone <= #1 0; - if (DivStart) count <= #1 0; - else count <= #1 count+1; + if (DivStart) Count <= #1 `DIVLEN/2+2; + else Count <= #1 Count-1; end endmodule @@ -237,7 +243,7 @@ module srtpreproc ( input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN:0] X, + output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent @@ -245,7 +251,7 @@ module srtpreproc ( ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN:0] PreprocA, PreprocX; + logic [`DIVLEN-1:0] PreprocA, PreprocX; logic [`DIVLEN-1:0] PreprocB, PreprocY; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; @@ -263,7 +269,7 @@ module srtpreproc ( // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XManE<