From 437fd52bf62a6db6f3d3c1a1132303666a51c1ec Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 20 Sep 2022 20:05:00 -0700 Subject: [PATCH] Gated sticky bit in fdiv with SpecialCase --- pipelined/config/shared/wally-shared.vh | 4 ++-- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 5 +++-- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 4 ++-- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 3 ++- pipelined/src/fpu/{ => postproc}/flags.sv | 2 ++ 5 files changed, 11 insertions(+), 7 deletions(-) rename pipelined/src/fpu/{ => postproc}/flags.sv (97%) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 78481603..cd5bb05e 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,8 +102,8 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 -`define DIVCOPIES 32'h1 +`define RADIX 32'h4 +`define DIVCOPIES 32'h3 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 5b740f5a..49ecb5a1 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -61,6 +61,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; logic Firstun; logic WZero; + logic SpecialCase; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), @@ -69,11 +70,11 @@ module fdivsqrt( .clk, .reset, .FmtE, .XsE, .SqrtE, .DivBusy, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .XNaNE, .YNaNE, - .XInfE, .YInfE, .WZero); + .XInfE, .YInfE, .WZero, .SpecialCase); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .DivBusy); - fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .QmM, .WZero, .DivSM); + fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCase, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index cc1294f2..0eef83eb 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -44,14 +44,14 @@ module fdivsqrtfsm( input logic StallM, input logic WZero, output logic DivDone, - output logic DivBusy + output logic DivBusy, + output logic SpecialCase ); typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; logic [`DURLEN-1:0] step; - logic SpecialCase; logic [`DURLEN-1:0] cycles; // terminate immediately on special cases diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 795879cb..564b0766 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,6 +37,7 @@ module fdivsqrtpostproc( input logic [`DIVb+1:0] FirstC, input logic Firstun, input logic SqrtM, + input logic SpecialCase, output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM @@ -64,7 +65,7 @@ module fdivsqrtpostproc( end else begin assign WZero = weq0; end - assign DivSM = ~WZero; + assign DivSM = ~WZero & ~(SpecialCase & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide // Determine if sticky bit is negative assign W = WC+WS; diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/postproc/flags.sv similarity index 97% rename from pipelined/src/fpu/flags.sv rename to pipelined/src/fpu/postproc/flags.sv index 952e0c02..c56bc651 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/postproc/flags.sv @@ -128,10 +128,12 @@ module flags( // | | | | | and if the input isnt infinity or NaN // | | | | | | assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid); + //assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); + //assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds)