diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 14924fcc..cd5bb05e 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,7 +102,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h3 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 5b740f5a..43f7687c 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -61,6 +61,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; logic Firstun; logic WZero; + logic SpecialCaseM; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), @@ -69,11 +70,11 @@ module fdivsqrt( .clk, .reset, .FmtE, .XsE, .SqrtE, .DivBusy, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .XNaNE, .YNaNE, - .XInfE, .YInfE, .WZero); + .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .DivBusy); - fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .QmM, .WZero, .DivSM); + fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv index 08b2dfab..9b0427aa 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fdivsqrtfgen4 ( - input logic [3:0] u, + input logic [3:0] udigit, input logic [`DIVb+3:0] C, U, UM, output logic [`DIVb+3:0] F ); @@ -47,9 +47,9 @@ module fdivsqrtfgen4 ( // Choose which adder input will be used always_comb - if (u[3]) F = F2; - else if (u[2]) F = F1; - else if (U[1]) F = FN1; - else if (u[0]) F = FN2; + if (udigit[3]) F = F2; + else if (udigit[2]) F = F1; + else if (udigit[1]) F = FN1; + else if (udigit[0]) F = FN2; else F = F0; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index cc1294f2..db11dcef 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -44,18 +44,20 @@ module fdivsqrtfsm( input logic StallM, input logic WZero, output logic DivDone, - output logic DivBusy + output logic DivBusy, + output logic SpecialCaseM ); typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; logic [`DURLEN-1:0] step; - logic SpecialCase; logic [`DURLEN-1:0] cycles; + logic SpecialCaseE; // terminate immediately on special cases - assign SpecialCase = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE); + assign SpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE); + flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 // NS = NF + 1 @@ -103,7 +105,7 @@ module fdivsqrtfsm( step <= cycles; // *** this should be adjusted to depend on the precision; sqrt should use one fewer step becasue firststep=1 // $display("Setting Nf = %d fbits %d cycles = %d FmtE %d FPSIZES = %d Q_NF = %d num = %d denom = %d\n", Nf, fbits, cycles, FmtE, `FPSIZES, `Q_NF, // (fbits +(`LOGR*`DIVCOPIES)-1), (`LOGR*`DIVCOPIES)); - if (SpecialCase) state <= #1 DONE; + if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (DivDone) begin if (StallM) state <= #1 DONE; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 795879cb..e0acd0ed 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,6 +37,7 @@ module fdivsqrtpostproc( input logic [`DIVb+1:0] FirstC, input logic Firstun, input logic SqrtM, + input logic SpecialCaseM, output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM @@ -64,7 +65,7 @@ module fdivsqrtpostproc( end else begin assign WZero = weq0; end - assign DivSM = ~WZero; + assign DivSM = ~WZero & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide // Determine if sticky bit is negative assign W = WC+WS; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 9b357862..2a6f6a9e 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -61,6 +61,8 @@ module fdivsqrtpreproc ( assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; + + // *** explain why X is shifted between radices if (`RADIX == 2) assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; else assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv index f0a6cae0..4379724f 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv @@ -35,7 +35,7 @@ module fdivsqrtqsel4 ( input logic [4:0] Smsbs, input logic [`DIVb+3:0] WS, WC, input logic Sqrt, j1, - output logic [3:0] u + output logic [3:0] udigit ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; @@ -107,6 +107,6 @@ module fdivsqrtqsel4 ( else if (Smsbs == 5'b10000) A = 3'b111; else A = Smsbs[2:0]; end else A = Dmsbs; - assign u = USel4[{A,Wmsbs}]; + assign udigit = USel4[{A,Wmsbs}]; endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 9fa655c3..e463762a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -46,7 +46,7 @@ module fdivsqrtstage4 ( /* verilator lint_on UNOPTFLAT */ logic [`DIVb+3:0] Dsel; - logic [3:0] u; + logic [3:0] udigit; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; logic [4:0] Smsbs; @@ -61,11 +61,11 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; - fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .u); - fdivsqrtfgen4 fgen4(.u, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit); + fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); always_comb - case (u) + case (udigit) 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; @@ -77,10 +77,10 @@ module fdivsqrtstage4 ( // Partial Product Generation // WSA, WCA = WS + WC - qD assign AddIn = SqrtM ? F : Dsel; - assign CarryIn = ~SqrtM & (u[3] | u[2]); // +1 for 2's complement of -D and -2D + assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); - fdivsqrtuotfc4 fdivsqrtuotfc4(.u, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); assign un = 0; // unused for radix 4 endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv index c3c64bbb..d0524ac8 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fdivsqrtuotfc4( - input logic [3:0] u, + input logic [3:0] udigit, input logic Sqrt, input logic [`DIVb:0] U, UM, input logic [`DIVb:0] C, @@ -47,19 +47,19 @@ module fdivsqrtuotfc4( assign K3 = (C & ~(C << 2)); // 3K always_comb begin - if (u[3]) begin + if (udigit[3]) begin UNext = U | K2; UMNext = U | K1; - end else if (u[2]) begin + end else if (udigit[2]) begin UNext = U | K1; UMNext = U; - end else if (u[1]) begin + end else if (udigit[1]) begin UNext = UM | K3; UMNext = UM | K2; - end else if (u[0]) begin + end else if (udigit[0]) begin UNext = UM | K2; UMNext = UM | K1; - end else begin // digit = 0 + end else begin // udigit = 0 UNext = U; UMNext = UM | K3; end diff --git a/pipelined/src/fpu/postproc/divshiftcalc.sv b/pipelined/src/fpu/postproc/divshiftcalc.sv index 2b1128ea..cb671a80 100644 --- a/pipelined/src/fpu/postproc/divshiftcalc.sv +++ b/pipelined/src/fpu/postproc/divshiftcalc.sv @@ -73,8 +73,10 @@ module divshiftcalc( assign DivDenormShiftAmt = DivDenormShiftPos ? DivDenormShift[`LOGNORMSHIFTSZ-1:0] : '0; assign DivShiftAmt = DivResDenorm ? DivDenormShiftAmt : NormShift; + // *** explain why radix 4 division needs a left shift by 1 + // *** can this shift be moved into the shiftcorrection logic? if (`RADIX == 4) - assign DivShiftIn = {{`NF{1'b0}}, DivQm[`DIVb-1:0], {`NORMSHIFTSZ-`DIVb+2-`NF{1'b0}}}; + assign DivShiftIn = Sqrt ? {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1-`NF{1'b0}}} : {{`NF{1'b0}}, DivQm[`DIVb-1:0], {`NORMSHIFTSZ-`DIVb+2-`NF{1'b0}}}; else assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/postproc/flags.sv similarity index 97% rename from pipelined/src/fpu/flags.sv rename to pipelined/src/fpu/postproc/flags.sv index 952e0c02..c56bc651 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/postproc/flags.sv @@ -128,10 +128,12 @@ module flags( // | | | | | and if the input isnt infinity or NaN // | | | | | | assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid); + //assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); + //assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds)