From fb890d621d252202c8fb57eb3c86a3f3be7aa3bd Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 20 Jul 2022 02:27:39 +0000 Subject: [PATCH] moved ctrl signal registers into fctrl, also a lot of code cleaning --- pipelined/src/fpu/divsqrt.sv | 20 +- pipelined/src/fpu/fclassify.sv | 38 +-- pipelined/src/fpu/fcmp.sv | 138 ++++++----- pipelined/src/fpu/fctrl.sv | 51 +++- pipelined/src/fpu/fcvt.sv | 12 +- pipelined/src/fpu/fhazard.sv | 28 +-- pipelined/src/fpu/fma.sv | 18 +- pipelined/src/fpu/fmashiftcalc.sv | 8 +- pipelined/src/fpu/fpu.sv | 336 +++++++++++++-------------- pipelined/src/fpu/fsgninj.sv | 45 ++-- pipelined/src/fpu/otfc.sv | 2 +- pipelined/src/fpu/postprocess.sv | 20 +- pipelined/src/fpu/qsel.sv | 2 +- pipelined/src/fpu/shiftcorrection.sv | 6 +- pipelined/src/fpu/srt.sv | 26 +-- pipelined/src/fpu/srtfsm.sv | 6 +- pipelined/src/fpu/unpack.sv | 45 ++-- pipelined/src/fpu/unpackinput.sv | 38 +-- 18 files changed, 439 insertions(+), 400 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index ffc60026..a2f0ba8e 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -34,20 +34,20 @@ module divsqrt( input logic clk, input logic reset, input logic [`FMTBITS-1:0] FmtE, - input logic [`NF:0] XManE, YManE, - input logic [`NE-1:0] XExpE, YExpE, + input logic [`NF:0] XmE, YmE, + input logic [`NE-1:0] XeE, YeE, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, - output logic DivStickyM, + input logic StallE, + output logic DivSM, output logic DivBusy, output logic DivDone, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1-(`RADIX/4):0] QuotM + output logic [`QLEN-1-(`RADIX/4):0] QmM // output logic [`XLEN-1:0] RemM, ); @@ -60,10 +60,10 @@ module divsqrt( logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 6c7ab451..6aaec00a 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -29,29 +29,29 @@ `include "wally-config.vh" module fclassify ( - input logic XSgnE, // sign bit - input logic XNaNE, // is NaN - input logic XSNaNE, // is signaling NaN - input logic XDenormE, // is denormal - input logic XZeroE, // is zero - input logic XInfE, // is infinity - output logic [`XLEN-1:0] ClassResE // classify result - ); + input logic Xs, // sign bit + input logic XNaN, // is NaN + input logic XSNaN, // is signaling NaN + input logic XDenorm,// is denormal + input logic XZero, // is zero + input logic XInf, // is infinity + output logic [`XLEN-1:0] ClassRes// classify result +); logic PInf, PZero, PNorm, PDenorm; logic NInf, NZero, NNorm, NDenorm; - logic XNormE; + logic XNorm; // determine the sub categories - assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE); - assign PInf = ~XSgnE&XInfE; - assign NInf = XSgnE&XInfE; - assign PNorm = ~XSgnE&XNormE; - assign NNorm = XSgnE&XNormE; - assign PDenorm = ~XSgnE&XDenormE; - assign NDenorm = XSgnE&XDenormE; - assign PZero = ~XSgnE&XZeroE; - assign NZero = XSgnE&XZeroE; + assign XNorm= ~(XNaN | XInf| XDenorm| XZero); + assign PInf = ~Xs&XInf; + assign NInf = Xs&XInf; + assign PNorm = ~Xs&XNorm; + assign NNorm = Xs&XNorm; + assign PDenorm = ~Xs&XDenorm; + assign NDenorm = Xs&XDenorm; + assign PZero = ~Xs&XZero; + assign NZero = Xs&XZero; // determine sub category and combine into the result // bit 0 - -Inf @@ -64,6 +64,6 @@ module fclassify ( // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 9c675784..48ff536f 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -27,9 +27,10 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" -// FOpCtrlE values +// OpCtrl values // 110 min // 101 max // 010 equal @@ -37,36 +38,32 @@ // 011 less than or equal module fcmp ( - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [2:0] FOpCtrlE, // see above table - input logic XSgnE, YSgnE, // input signs - input logic [`NE-1:0] XExpE, YExpE, // input exponents - input logic [`NF:0] XManE, YManE, // input mantissa - input logic XZeroE, YZeroE, // is zero - input logic XNaNE, YNaNE, // is NaN - input logic XSNaNE, YSNaNE, // is signaling NaN - input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs - output logic CmpNVE, // invalid flag - output logic [`FLEN-1:0] CmpFpResE, // compare resilt - output logic [`XLEN-1:0] CmpIntResE // compare resilt + input logic [`FMTBITS-1:0] Fmt, // format of fp number + input logic [2:0] OpCtrl, // see above table + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Xe, Ye, // input exponents + input logic [`NF:0] Xm, Ym, // input mantissa + input logic XZero, YZero, // is zero + input logic XNaN, YNaN, // is NaN + input logic XSNaN, YSNaN, // is signaling NaN + input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) + output logic CmpNV, // invalid flag + output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result + output logic [`XLEN-1:0] CmpIntRes // compare integer result ); - logic LTabs, LT, EQ; // is X < or > or = Y - logic [`FLEN-1:0] NaNRes; - logic BothZero, EitherNaN, EitherSNaN; + logic LTabs, LT, EQ; // is X < or > or = Y + logic [`FLEN-1:0] NaNRes; // NaN result + logic BothZero; // are both inputs zero + logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN - assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers - assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs); - // assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression + assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers + assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison + assign EQ = (X == Y); - //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]}); - //assign LT = XInt < YInt; -// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE Y + else // MIN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + + // LT/LE/EQ + // - -0 = 0 + // - inf = inf and -inf = -inf + // - return 0 if comparison with NaN (unordered) + assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; endmodule diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5c553e86..85047248 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -29,25 +29,41 @@ `include "wally-config.vh" module fctrl ( + input logic clk, + input logic reset, + input logic StallE, StallM, StallW, // stall signals + input logic FlushE, FlushM, FlushW, // flush signals + input logic [31:0] InstrD, input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? + input logic FDivBusyE, // is the divider busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic FRegWriteD, // FP register write enable - output logic FDivStartD, // Start division or squareroot - output logic [1:0] FResSelD, // select result to be written to fp register - output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit - output logic [1:0] PostProcSelD, - output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1 - output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - output logic FWriteIntD // is the result written to the integer register + output logic FRegWriteM, FRegWriteW, // FP register write enable + output logic [2:0] FrmM, // FP rounding mode + output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format + output logic DivStartE, // Start division or squareroot + output logic FWriteIntE, FWriteIntM, // Write to integer register + output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component + output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage + output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit + output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input ); `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; + logic FRegWriteD; // FP register write enable + logic DivStartD; // integer register write enable + logic FWriteIntD; // integer register write enable + logic FRegWriteE; // FP register write enable + logic [2:0] OpCtrlD; // Select which opperation to do in each component + logic [1:0] PostProcSelD; // select result in the post processing unit + logic [1:0] FResSelD; // Select one of the results that finish in the memory stage + logic [2:0] FrmD, FrmE; // FP rounding mode + logic [`FMTBITS-1:0] FmtD; // FP format //*** will putting x for don't cares reduce area in synthisis??? // FPU Instruction Decoder always_comb @@ -130,7 +146,7 @@ module fctrl ( endcase // unswizzle control bits - assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; + assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -168,7 +184,7 @@ module fctrl ( // 10 fma // Other Sel: -// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]} +// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]} // 000 - sign 00 // 001 - negate sign 00 // 010 - xor sign 00 @@ -205,5 +221,20 @@ module fctrl ( // 01 - negate sign // 10 - xor sign + // D/E pipleine register + flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + // E/M pipleine register + flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); + // M/W pipleine register + flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResSelM}, + {FRegWriteW, FResSelW}); endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 9d7f2d62..d2967887 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -35,7 +35,7 @@ module fcvt ( input logic [`NE-1:0] Xe, // input's exponent input logic [`NF:0] Xm, // input's fraction input logic [`XLEN-1:0] Int, // integer input - from IEU - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic ToInt, // is fp->int (since it's writting to the integer register) input logic XZero, // is the input zero input logic XDenorm, // is the input denormalized @@ -73,17 +73,17 @@ module fcvt ( // seperate OpCtrl for code readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output // - int -> fp: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index ca31d904..36a0ff82 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -31,20 +31,20 @@ `include "wally-config.vh" module fhazard( - input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses - input logic FRegWriteM, FRegWriteW, // is the fp register being written to - input logic [4:0] RdM, RdW, // the adress being written to - input logic [1:0] FResSelM, // the result being selected + input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses + input logic FRegWriteM, FRegWriteW, // is the fp register being written to + input logic [4:0] RdM, RdW, // the adress being written to + input logic [1:0] FResSelM, // the result being selected output logic FStallD, // stall the decode stage - output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value + output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); always_comb begin // set defaults - FForwardXE = 2'b00; // choose FRD1E - FForwardYE = 2'b00; // choose FRD2E - FForwardZE = 2'b00; // choose FRD3E + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E FStallD = 0; //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait @@ -52,28 +52,28 @@ module fhazard( // if the needed value is in the memory stage - input 1 if ((Adr1E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W + else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 2 if ((Adr2E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W + else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 3 if ((Adr3E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W + else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 3f4cc2ac..067147ee 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -34,7 +34,7 @@ module fma( input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic XZero, YZero, ZZero, // is the input zero - input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic ZmSticky, // sticky bit that is calculated during alignment @@ -46,7 +46,7 @@ module fma( output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [`NE+1:0] Se, - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count ); logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format @@ -72,7 +72,7 @@ module fma( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); @@ -85,7 +85,7 @@ module fma( add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule @@ -120,7 +120,7 @@ endmodule module sign( - input logic [2:0] FOpCtrl, // opperation contol + input logic [2:0] OpCtrl, // opperation contol input logic Xs, Ys, Zs, // sign of the inputs output logic Ps, // the product's sign - takes opperation into account output logic As // aligned addend sign used in fma - takes opperation into account @@ -130,9 +130,9 @@ module sign( // Negate product's sign if FNMADD or FNMSUB // flip is negation opperation - assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); // flip if subtraction - assign As = Zs^FOpCtrl[0]; + assign As = Zs^OpCtrl[0]; endmodule @@ -275,7 +275,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+3:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d598efb7..7464149f 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -32,7 +32,7 @@ module fmashiftcalc( input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero input logic [`NE+1:0] FmaSe, @@ -52,7 +52,7 @@ module fmashiftcalc( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -152,7 +152,7 @@ module fmashiftcalc( // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1; else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index a9c0ac24..6d9b9cf4 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -30,28 +30,28 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, // instruction from IFU - input logic [`FLEN-1:0] ReadDataW,// Read data from memory - input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU) - input logic StallE, StallM, StallW, // stall signals from HZU - input logic FlushE, FlushM, FlushW, // flush signals from HZU - input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) - input logic [1:0] STATUS_FS, // Is floating-point enabled? - output logic FRegWriteM, // FP register write enable - output logic FpLoadStoreM, // Fp load instruction? - output logic FStore2, - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, // integer register write enables - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register - output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register - output logic [1:0] FResSelW, - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode (from CSR) + input logic [31:0] InstrD, // instruction (from IFU) + input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) + input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU) + input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic FlushE, FlushM, FlushW, // flush signals (from HZU) + input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) + input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) + output logic FRegWriteM, // FP register write enable (to privileged unit) + output logic FpLoadStoreM, // Fp load instruction? (to LSU) + output logic FStore2, // store two words into memory (to LSU) + output logic FStallD, // Stall the decode stage (To HZU) + output logic FWriteIntE, // integer register write enable (to IEU) + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + output logic [1:0] FResSelW, // final result selection (to IEU) + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); @@ -62,99 +62,88 @@ module fpu ( // - sets the underflow after rounding // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic FWriteIntM; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic FRegWriteW; // FP register write enable + logic [2:0] FrmM; // FP rounding mode + logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic DivStartE; // Start division or squareroot + logic FWriteIntM; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals + logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [`NE-1:0] ZExpM; // input's exponent - memory stage - logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic FmtQ; - logic FOpCtrlQ; + logic XsE, YsE, ZsE; // input's sign - execute stage + logic XsM, YsM; // input's sign - memory stage + logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage + logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage + logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic [`NE+1:0] SeE,SeM; - logic KillProdE, KillProdM; - logic InvAE, InvAM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic SsE, SsM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SmE, SmM; + logic [`NE+1:0] PeE, PeM; + logic ZmStickyE, ZmStickyM; + logic [`NE+1:0] SeE,SeM; + logic KillProdE, KillProdM; + logic InvAE, InvAM; + logic NegSumE, NegSumM; + logic AsE, AsM; + logic PsE, PsM; + logic SsE, SsM; + logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [`NE:0] CeE, CeM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign + logic CsE, CsM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1-(`RADIX/4):0] QuotM; - logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivStickyE, DivStickyM; - logic DivDoneM; - logic [`DURLEN-1:0] EarlyTermShiftM; + logic [`QLEN-1-(`RADIX/4):0] QmM; + logic [`NE+1:0] QeE, QeM; + logic DivSE, DivSM; + logic DivDoneM; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM; // divide/squareroot flags - logic [`FLEN-1:0] ReadResW; // read result (load instruction) - logic [`XLEN-1:0] ClassResE; // classify result - logic [`XLEN-1:0] FIntResE; // classify result - logic [`FLEN-1:0] FpResM, FpResW; // classify result - logic [`FLEN-1:0] PostProcResM; // classify result - logic [4:0] PostProcFlgM; // classify result + logic [`XLEN-1:0] ClassResE; // classify result + logic [`XLEN-1:0] FIntResE; // classify result + logic [`FLEN-1:0] FpResM, FpResW; // classify result + logic [`FLEN-1:0] PostProcResM; // classify result + logic [4:0] PostProcFlgM; // classify result logic [`XLEN-1:0] FCvtIntResM; - logic [`FLEN-1:0] CmpFpResE; // compare result - logic [`XLEN-1:0] CmpIntResE; // compare result - logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register + logic [`FLEN-1:0] CmpFpResE; // compare result + logic [`XLEN-1:0] CmpIntResE; // compare result + logic CmpNVE; // compare invalid flag (Not Valid) + logic [`FLEN-1:0] SgnResE; // sign injection result + logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic load_preload; // enable for FF on fpdivsqrt - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format + logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed @@ -171,9 +160,11 @@ module fpu ( ////////////////////////////////////////////////////////////////////////////////////////// // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS, - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, - .FmtD, .FrmD, .FWriteIntD); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, + .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, + .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, + .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file fregfile fregfile (.clk, .reset, .we4(FRegWriteW), @@ -185,12 +176,6 @@ module fpu ( flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); - flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); // EXECUTION STAGE @@ -207,12 +192,12 @@ module fpu ( // Hazard unit for FPU // - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs - mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE); - mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE); - mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE); + mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE); + mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE); + mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE); generate @@ -227,7 +212,7 @@ module fpu ( endgenerate - mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions + mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions // Force Z to be 0 for multiply instructions generate @@ -241,55 +226,76 @@ module fpu ( (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes endgenerate - mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE); // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); + unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), + .ZInf(ZInfE), .XExpMax(XExpMaxE)); - // fma - does multiply, add, and multiply-add instructions - fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), - .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), - .Xm(XManE), .Ym(YManE), .Zm(ZManE), + // fused multiply add + // - fadd/fsub + // - fmul + // - fmadd/fnmadd/fmsub/fnmsub + fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), + .Xm(XmE), .Ym(YmE), .Zm(ZmE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), - .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), - .Sm(SumE), .Pe(ProdExpE), - .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), - .ZmSticky(AddendStickyE), .KillProd(KillProdE)); + .OpCtrl(OpCtrlE), .Fmt(FmtE), + .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), + .Sm(SmE), .Pe(PeE), + .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE), + .ZmSticky(ZmStickyE), .KillProd(KillProdE)); - divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); - // other FP execution units - fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, - .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); - fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE); + // divide and squareroot + // - fdiv + // - fsqrt + // *** add other opperations + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), + .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal + .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); + // compare + // - fmin/fmax + // - flt/fle/feq + fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), + .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), + .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); + // sign injection + // - fsgnj/fsgnjx/fsgnjn + fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); - fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), - .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), - .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), + // classify + // - fclass + fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), + .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); + + // convert + // - fcvt.*.* + fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), + .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), + .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); // data to be stored in memory - to IEU // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`LLEN==`XLEN) begin - assign FWriteDataE = FSrcYE[`XLEN-1:0]; + assign FWriteDataE = YE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; else assign FStore2 = FmtM; - if (`FPSIZES==1) assign FWriteDataE = FSrcYE; - else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; - else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + if (`FPSIZES==1) assign FWriteDataE = YE; + else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}}; + else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}}; flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); end @@ -306,14 +312,14 @@ module fpu ( {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes endgenerate // select a result that may be written to the FP register - mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE); - assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); + mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU if (`FLEN>`XLEN) - assign IntSrcXE = FSrcXE[`XLEN-1:0]; + assign IntSrcXE = XE[`XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; + assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE}; mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok @@ -321,27 +327,24 @@ module fpu ( // E/M pipe registers - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); - flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM); + flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM}); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM}); + flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); - flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM); flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); + {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE}, + {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, - {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, - {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); + {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE}, + {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM}); // BEGIN MEMORY STAGE @@ -357,11 +360,11 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), + postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM), + .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged @@ -371,9 +374,6 @@ module fpu ( // M/W pipe registers flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); - flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResSelM, FmtM}, - {FRegWriteW, FResSelW, FmtW}); // BEGIN WRITEBACK STAGE diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 17d15669..a5b7e774 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -26,60 +26,59 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module fsgninj ( - input logic XSgnE, YSgnE, // X and Y sign bits - input logic [`FLEN-1:0] FSrcXE, // X - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [1:0] SgnOpCodeE, // operation control - output logic [`FLEN-1:0] SgnResE // result + input logic Xs, Ys, // X and Y sign bits + input logic [`FLEN-1:0] X, // X + input logic [`FMTBITS-1:0] Fmt, // format + input logic [1:0] OpCtrl, // operation control + output logic [`FLEN-1:0] SgnRes // result ); logic ResSgn; - //op code designation: - // - //00 - fsgnj - directly copy over sign value of FSrcYE - //01 - fsgnjn - negate sign value of FSrcYE - //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE - // + // OpCtrl: + // 00 - fsgnj - directly copy over sign value of Y + // 01 - fsgnjn - negate sign value of Y + // 10 - fsgnjx - XOR sign values of X and Y // calculate the result's sign - assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE; + assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys; // format final result based on precision // - uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`FPSIZES == 1) - assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; + assign SgnRes = {ResSgn, X[`FLEN-2:0]}; else if (`FPSIZES == 2) - assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]}; + assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; else if (`FPSIZES == 3) begin logic [2:0] SgnBits; always_comb - case (FmtE) - `FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]}; + case (Fmt) + `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; + `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; `FMT2: SgnBits = {2'b11, ResSgn}; default: SgnBits = {3{1'bx}}; endcase - assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]}; + assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; end else if (`FPSIZES == 4) begin logic [3:0] SgnBits; always_comb - case (FmtE) - `Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]}; + case (Fmt) + `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; + `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; + `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; `H_FMT: SgnBits = {3'b111, ResSgn}; endcase - assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]}; + assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; end endmodule diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 8d11273a..66af5b3c 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -107,6 +107,6 @@ module otfc4 ( QMNext = {QMR, 2'b11}; end end - // Final Quoteint is in the range [.5, 2) + // Final Qmeint is in the range [.5, 2) endmodule diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index de3c4f30..d3169d47 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -36,7 +36,7 @@ module postprocess ( input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic XZero, YZero, ZZero, // inputs are zero input logic XInf, YInf, ZInf, // inputs are infinity input logic XNaN, YNaN, ZNaN, // inputs are NaN @@ -54,7 +54,7 @@ module postprocess ( input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z input logic FmaSs, - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivS, @@ -125,14 +125,14 @@ module postprocess ( logic Sqrt; // signals to help readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; - assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; + assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign CvtOp = (PostProcSel == 2'b00); assign FmaOp = (PostProcSel == 2'b10); assign DivOp = (PostProcSel == 2'b01)&DivDone; - assign Sqrt = FOpCtrl[0]; + assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); @@ -142,9 +142,9 @@ module postprocess ( // - fp -> fp: OpCtrl contains the percision of the output // - otherwise: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -152,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 396ca776..202b3ee8 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits // for efficiency. You can probably optimize your logic to // select the proper divisor with less delay. - // Quotient equations from EE371 lecture notes 13-20 + // Qmient equations from EE371 lecture notes 13-20 assign p = ps ^ pc; assign g = ps & pc; diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 514edbee..6329ffe2 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; + logic [`CORRSHIFTSZ-1:0] CorrQmShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -53,11 +53,11 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits always_comb if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; - else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index ee5ae9a3..7e9f9922 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -37,15 +37,15 @@ module srt( input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, - output logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -62,7 +62,7 @@ module srt( /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] DivCalcExp; + logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; @@ -88,7 +88,7 @@ module srt( mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); // Divisor Selections @@ -123,7 +123,7 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; if(`RADIX==2) @@ -132,7 +132,7 @@ module srt( else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -155,7 +155,7 @@ module divinteration ( logic [3:0] q; logic qp, qz;//, qn; - // Quotient Selection logic + // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // q encoding: // 1000 = +2 @@ -226,7 +226,7 @@ module expcalc( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] DivCalcExp + output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; @@ -255,5 +255,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 634ecc1d..597f96cd 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -43,7 +43,7 @@ module srtfsm( input logic [`DIVLEN+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, - output logic DivStickyE, + output logic DivSE, output logic DivDone, output logic NegSticky, output logic DivBusy @@ -65,9 +65,9 @@ module srtfsm( // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) - assign DivStickyE = |W&~(StickyWSA == WS); + assign DivSE = |W&~(StickyWSA == WS); else - assign DivStickyE = |W; + assign DivSE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 71cad187..050839c2 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -30,35 +30,34 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half - output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ - output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) - output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN - output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, ZDenormE, // is XYZ denormalized - output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero - output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax // does X have the maximum exponent (NaN or Inf) ); - logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero - logic YExpMaxE, ZExpMaxE; // is the exponent all 1s + logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), - .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), - .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), + .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), + .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), - .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), - .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), + .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), + .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); - unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), - .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), - .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), + .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), + .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); // is the input denormalized - assign XDenormE = ~XExpNonZero & ~XFracZero; - assign ZDenormE = ~ZExpNonZero & ~ZFracZero; + assign XDenorm = ~XExpNonZero & ~XFracZero; + assign ZDenorm = ~ZExpNonZero & ~ZFracZero; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 2b078cc6..7be92250 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,7 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) @@ -74,16 +74,16 @@ module unpackinput ( // quad and half // double and half - assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; + assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; + assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; // is the exponent non-zero - assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; + assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; // example double to single conversion: // 1023 = 0011 1111 1111 @@ -95,10 +95,10 @@ module unpackinput ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; // is the exponent all 1's - assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; + assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; end else if (`FPSIZES == 3) begin // three floating point precsions supported @@ -122,7 +122,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) `FMT: BadNaNBox = 0; `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; @@ -131,7 +131,7 @@ module unpackinput ( // extract the sign bit always_comb - case (FmtE) + case (Fmt) `FMT: Sgn = In[`FLEN-1]; `FMT1: Sgn = In[`LEN1-1]; `FMT2: Sgn = In[`LEN2-1]; @@ -140,7 +140,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) `FMT: Frac = In[`NF-1:0]; `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; @@ -149,7 +149,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) @@ -166,7 +166,7 @@ module unpackinput ( // convert the larger precision's exponent to use the largest precision's bias always_comb - case (FmtE) + case (Fmt) `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; @@ -175,7 +175,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) `FMT: ExpMax = &In[`FLEN-2:`NF]; `FMT1: ExpMax = &In[`LEN1-2:`NF1]; `FMT2: ExpMax = &In[`LEN2-2:`NF2]; @@ -194,7 +194,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) 2'b11: BadNaNBox = 0; 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; @@ -203,7 +203,7 @@ module unpackinput ( // extract sign bit always_comb - case (FmtE) + case (Fmt) 2'b11: Sgn = In[`Q_LEN-1]; 2'b01: Sgn = In[`D_LEN-1]; 2'b00: Sgn = In[`S_LEN-1]; @@ -213,7 +213,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) 2'b11: Frac = In[`Q_NF-1:0]; 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; @@ -222,7 +222,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; @@ -240,7 +240,7 @@ module unpackinput ( // convert the double precsion exponent into quad precsion always_comb - case (FmtE) + case (Fmt) 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; @@ -250,7 +250,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; 2'b00: ExpMax = &In[`S_LEN-2:`S_NF];