moved ctrl signal registers into fctrl, also a lot of code cleaning

This commit is contained in:
Katherine Parry 2022-07-20 02:27:39 +00:00
parent cce57fdcc5
commit b26297e874
18 changed files with 439 additions and 400 deletions

View File

@ -34,20 +34,20 @@ module divsqrt(
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [`FMTBITS-1:0] FmtE, input logic [`FMTBITS-1:0] FmtE,
input logic [`NF:0] XManE, YManE, input logic [`NF:0] XmE, YmE,
input logic [`NE-1:0] XExpE, YExpE, input logic [`NE-1:0] XeE, YeE,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
input logic DivStartE, input logic DivStartE,
input logic StallM, input logic StallM,
input logic StallE, input logic StallE,
output logic DivStickyM, output logic DivSM,
output logic DivBusy, output logic DivBusy,
output logic DivDone, output logic DivDone,
output logic [`NE+1:0] DivCalcExpM, output logic [`NE+1:0] QeM,
output logic [`DURLEN-1:0] EarlyTermShiftM, output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`QLEN-1-(`RADIX/4):0] QuotM output logic [`QLEN-1-(`RADIX/4):0] QmM
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
); );
@ -60,10 +60,10 @@ module divsqrt(
logic [`DURLEN-1:0] Dur; logic [`DURLEN-1:0] Dur;
logic NegSticky; logic NegSticky;
srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
.StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
endmodule endmodule

View File

@ -29,29 +29,29 @@
`include "wally-config.vh" `include "wally-config.vh"
module fclassify ( module fclassify (
input logic XSgnE, // sign bit input logic Xs, // sign bit
input logic XNaNE, // is NaN input logic XNaN, // is NaN
input logic XSNaNE, // is signaling NaN input logic XSNaN, // is signaling NaN
input logic XDenormE, // is denormal input logic XDenorm,// is denormal
input logic XZeroE, // is zero input logic XZero, // is zero
input logic XInfE, // is infinity input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassResE // classify result output logic [`XLEN-1:0] ClassRes// classify result
); );
logic PInf, PZero, PNorm, PDenorm; logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm; logic NInf, NZero, NNorm, NDenorm;
logic XNormE; logic XNorm;
// determine the sub categories // determine the sub categories
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE); assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
assign PInf = ~XSgnE&XInfE; assign PInf = ~Xs&XInf;
assign NInf = XSgnE&XInfE; assign NInf = Xs&XInf;
assign PNorm = ~XSgnE&XNormE; assign PNorm = ~Xs&XNorm;
assign NNorm = XSgnE&XNormE; assign NNorm = Xs&XNorm;
assign PDenorm = ~XSgnE&XDenormE; assign PDenorm = ~Xs&XDenorm;
assign NDenorm = XSgnE&XDenormE; assign NDenorm = Xs&XDenorm;
assign PZero = ~XSgnE&XZeroE; assign PZero = ~Xs&XZero;
assign NZero = XSgnE&XZeroE; assign NZero = Xs&XZero;
// determine sub category and combine into the result // determine sub category and combine into the result
// bit 0 - -Inf // bit 0 - -Inf
@ -64,6 +64,6 @@ module fclassify (
// bit 7 - +Inf // bit 7 - +Inf
// bit 8 - signaling NaN // bit 8 - signaling NaN
// bit 9 - quiet NaN // bit 9 - quiet NaN
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule endmodule

View File

@ -27,9 +27,10 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE. // OR OTHER DEALINGS IN THE SOFTWARE.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
// FOpCtrlE values // OpCtrl values
// 110 min // 110 min
// 101 max // 101 max
// 010 equal // 010 equal
@ -37,36 +38,32 @@
// 011 less than or equal // 011 less than or equal
module fcmp ( module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] FOpCtrlE, // see above table input logic [2:0] OpCtrl, // see above table
input logic XSgnE, YSgnE, // input signs input logic Xs, Ys, // input signs
input logic [`NE-1:0] XExpE, YExpE, // input exponents input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] XManE, YManE, // input mantissa input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZeroE, YZeroE, // is zero input logic XZero, YZero, // is zero
input logic XNaNE, YNaNE, // is NaN input logic XNaN, YNaN, // is NaN
input logic XSNaNE, YSNaNE, // is signaling NaN input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNVE, // invalid flag output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpResE, // compare resilt output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntResE // compare resilt output logic [`XLEN-1:0] CmpIntRes // compare integer result
); );
logic LTabs, LT, EQ; // is X < or > or = Y logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero, EitherNaN, EitherSNaN; logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs); assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
// assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression assign EQ = (X == Y);
//assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]}); assign BothZero = XZero&YZero;
//assign LT = XInt < YInt; assign EitherNaN = XNaN|YNaN;
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE; assign EitherSNaN = XSNaN|YSNaN;
assign EQ = (FSrcXE == FSrcYE);
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
// flags // flags
@ -74,78 +71,91 @@ module fcmp (
// LT/LE - signaling - sets invalid if NaN input // LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input // EQ - quiet - sets invalid if signaling NaN input
always_comb begin always_comb begin
case (FOpCtrlE[2:0]) case (OpCtrl[2:0])
3'b110: CmpNVE = EitherSNaN;//min 3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max 3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal 3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than 3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal 3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNVE = 1'bx; default: CmpNV = 1'bx;
endcase endcase
end end
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X // for IEEE, return the payload of X
// for RISC-V, return the canonical NaN // for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1) if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
else if (`FPSIZES == 2) else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]}; if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3) else if (`FPSIZES == 3)
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: `FMT:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1: `FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]}; if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2: `FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]}; if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}}; default: NaNRes = {`FLEN{1'bx}};
endcase endcase
else if (`FPSIZES == 4) else if (`FPSIZES == 4)
always_comb always_comb
case (FmtE) case (Fmt)
2'h3: 2'h3:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
2'h1: 2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
2'h0: 2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
2'h2: 2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
endcase endcase
// when one input is a NaN -output the non-NaN
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN}; // Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
always_comb
if(OpCtrl[0]) // MAX
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
endmodule endmodule

View File

@ -29,25 +29,41 @@
`include "wally-config.vh" `include "wally-config.vh"
module fctrl ( module fctrl (
input logic clk,
input logic reset,
input logic StallE, StallM, StallW, // stall signals
input logic FlushE, FlushM, FlushW, // flush signals
input logic [31:0] InstrD,
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
input logic [6:0] OpD, // bits 6:0 of instruction input logic [6:0] OpD, // bits 6:0 of instruction
input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [2:0] FRM_REGW, // rounding mode from CSR
input logic [1:0] STATUS_FS, // is FPU enabled? input logic [1:0] STATUS_FS, // is FPU enabled?
input logic FDivBusyE, // is the divider busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable output logic FRegWriteM, FRegWriteW, // FP register write enable
output logic FDivStartD, // Start division or squareroot output logic [2:0] FrmM, // FP rounding mode
output logic [1:0] FResSelD, // select result to be written to fp register output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit output logic DivStartE, // Start division or squareroot
output logic [1:0] PostProcSelD, output logic FWriteIntE, FWriteIntM, // Write to integer register
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1 output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic FWriteIntD // is the result written to the integer register output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
); );
`define FCTRLW 11 `define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD; logic [`FCTRLW-1:0] ControlsD;
logic FRegWriteD; // FP register write enable
logic DivStartD; // integer register write enable
logic FWriteIntD; // integer register write enable
logic FRegWriteE; // FP register write enable
logic [2:0] OpCtrlD; // Select which opperation to do in each component
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [`FMTBITS-1:0] FmtD; // FP format
//*** will putting x for don't cares reduce area in synthisis??? //*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder // FPU Instruction Decoder
always_comb always_comb
@ -130,7 +146,7 @@ module fctrl (
endcase endcase
// unswizzle control bits // unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes: // rounding modes:
// 000 - round to nearest, ties to even // 000 - round to nearest, ties to even
@ -168,7 +184,7 @@ module fctrl (
// 10 fma // 10 fma
// Other Sel: // Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]} // Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00 // 000 - sign 00
// 001 - negate sign 00 // 001 - negate sign 00
// 010 - xor sign 00 // 010 - xor sign 00
@ -205,5 +221,20 @@ module fctrl (
// 01 - negate sign // 01 - negate sign
// 10 - xor sign // 10 - xor sign
// D/E pipleine register
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
// E/M pipleine register
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
// M/W pipleine register
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM},
{FRegWriteW, FResSelW});
endmodule endmodule

View File

@ -35,7 +35,7 @@ module fcvt (
input logic [`NE-1:0] Xe, // input's exponent input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] FOpCtrl, // choose which opperation (look below for values) input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register) input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero input logic XZero, // is the input zero
input logic XDenorm, // is the input denormalized input logic XDenorm, // is the input denormalized
@ -73,17 +73,17 @@ module fcvt (
// seperate OpCtrl for code readability // seperate OpCtrl for code readability
assign Signed = FOpCtrl[0]; assign Signed = OpCtrl[0];
assign Int64 = FOpCtrl[1]; assign Int64 = OpCtrl[1];
assign IntToFp = FOpCtrl[2]; assign IntToFp = OpCtrl[2];
// choose the ouptut format depending on the opperation // choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output // - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output // - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2) if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4) else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@ -31,20 +31,20 @@
`include "wally-config.vh" `include "wally-config.vh"
module fhazard( module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected input logic [1:0] FResSelM, // the result being selected
output logic FStallD, // stall the decode stage output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
); );
always_comb begin always_comb begin
// set defaults // set defaults
FForwardXE = 2'b00; // choose FRD1E ForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E ForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E ForwardZE = 2'b00; // choose FRD3E
FStallD = 0; FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
@ -52,28 +52,28 @@ module fhazard(
// if the needed value is in the memory stage - input 1 // if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM) if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage) // if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage // if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 2 // if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM) if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage) // if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage // if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 3 // if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM) if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage) // if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage // if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
end end

View File

@ -34,7 +34,7 @@ module fma(
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic ZmSticky, // sticky bit that is calculated during alignment output logic ZmSticky, // sticky bit that is calculated during alignment
@ -46,7 +46,7 @@ module fma(
output logic Ps, // the product's sign output logic Ps, // the product's sign
output logic Ss, // the sum's sign output logic Ss, // the sum's sign
output logic [`NE+1:0] Se, output logic [`NE+1:0] Se,
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count
); );
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
@ -72,7 +72,7 @@ module fma(
// Alignment shifter // Alignment shifter
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// calculate the signs and take the opperation into account // calculate the signs and take the opperation into account
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd); .Am, .ZmSticky, .KillProd);
@ -85,7 +85,7 @@ module fma(
add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
endmodule endmodule
@ -120,7 +120,7 @@ endmodule
module sign( module sign(
input logic [2:0] FOpCtrl, // opperation contol input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account output logic Ps, // the product's sign - takes opperation into account
output logic As // aligned addend sign used in fma - takes opperation into account output logic As // aligned addend sign used in fma - takes opperation into account
@ -130,9 +130,9 @@ module sign(
// Negate product's sign if FNMADD or FNMSUB // Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation // flip is negation opperation
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
// flip if subtraction // flip if subtraction
assign As = Zs^FOpCtrl[0]; assign As = Zs^OpCtrl[0];
endmodule endmodule
@ -275,7 +275,7 @@ endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+3:0] P, // product input logic [2*`NF+3:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result
); );
logic [3*`NF+6:0] T; logic [3*`NF+6:0] T;
@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt));
endmodule endmodule

View File

@ -32,7 +32,7 @@ module fmashiftcalc(
input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE-1:0] Ze, // exponent of Z
input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero input logic FmaKillProd, // is the product set to zero
input logic [`NE+1:0] FmaSe, input logic [`NE+1:0] FmaSe,
@ -52,7 +52,7 @@ module fmashiftcalc(
// Determine if the sum is zero // Determine if the sum is zero
assign FmaSZero = ~(|FmaSm); assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent // calculate the sum's exponent
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision //convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
@ -152,7 +152,7 @@ module fmashiftcalc(
// - shift once if killing a product and the result is denormalized // - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm}; assign FmaShiftIn = {3'b0, FmaSm};
if (`FPSIZES == 1) if (`FPSIZES == 1)
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
else else
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
endmodule endmodule

View File

@ -30,28 +30,28 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
input logic [31:0] InstrD, // instruction from IFU input logic [31:0] InstrD, // instruction (from IFU)
input logic [`FLEN-1:0] ReadDataW,// Read data from memory input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU) input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals from HZU input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled? input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
output logic FRegWriteM, // FP register write enable output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic FStore2, output logic FStore2, // store two words into memory (to LSU)
output logic FStallD, // Stall the decode stage output logic FStallD, // Stall the decode stage (To HZU)
output logic FWriteIntE, // integer register write enables output logic FWriteIntE, // integer register write enable (to IEU)
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic [1:0] FResSelW, output logic [1:0] FResSelW, // final result selection (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
); );
@ -62,99 +62,88 @@ module fpu (
// - sets the underflow after rounding // - sets the underflow after rounding
// control signals // control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot logic DivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register logic FWriteIntM; // Write to integer register
logic FWriteIntM; // Write to integer register logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals // regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding) logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals // unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage logic XsE, YsE, ZsE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value)
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
// Fma Signals // Fma Signals
logic [3*`NF+5:0] SumE, SumM; logic [3*`NF+5:0] SmE, SmM;
logic [`NE+1:0] ProdExpE, ProdExpM; logic [`NE+1:0] PeE, PeM;
logic AddendStickyE, AddendStickyM; logic ZmStickyE, ZmStickyM;
logic [`NE+1:0] SeE,SeM; logic [`NE+1:0] SeE,SeM;
logic KillProdE, KillProdM; logic KillProdE, KillProdM;
logic InvAE, InvAM; logic InvAE, InvAM;
logic NegSumE, NegSumM; logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM; logic AsE, AsM;
logic PSgnE, PSgnM; logic PsE, PsM;
logic SsE, SsM; logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
// Cvt Signals // Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent logic [`NE:0] CeE, CeM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign logic CsE, CsM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero? logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals //divide signals
logic [`QLEN-1-(`RADIX/4):0] QuotM; logic [`QLEN-1-(`RADIX/4):0] QmM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic [`NE+1:0] QeE, QeM;
logic DivStickyE, DivStickyM; logic DivSE, DivSM;
logic DivDoneM; logic DivDoneM;
logic [`DURLEN-1:0] EarlyTermShiftM; logic [`DURLEN-1:0] EarlyTermShiftM;
// result and flag signals // result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result logic [`XLEN-1:0] ClassResE; // classify result
logic [4:0] FDivFlgM; // divide/squareroot flags logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] ReadResW; // read result (load instruction) logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`XLEN-1:0] ClassResE; // classify result logic [`FLEN-1:0] PostProcResM; // classify result
logic [`XLEN-1:0] FIntResE; // classify result logic [4:0] PostProcFlgM; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM; logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid) logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals // other signals
logic FDivSqrtDoneE; // is divide done logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
@ -171,9 +160,11 @@ module fpu (
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals // calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS, fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.FmtD, .FrmD, .FWriteIntD); .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
// FP register file // FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW), fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -185,12 +176,6 @@ module fpu (
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
// EXECUTION STAGE // EXECUTION STAGE
@ -207,12 +192,12 @@ module fpu (
// Hazard unit for FPU // Hazard unit for FPU
// - determines if any forwarding or stalls are needed // - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE); .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs // forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE); mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE); mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE); mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
generate generate
@ -227,7 +212,7 @@ module fpu (
endgenerate endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions // Force Z to be 0 for multiply instructions
generate generate
@ -241,55 +226,76 @@ module fpu (
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate endgenerate
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
// unpack unit // unpack unit
// - splits FP inputs into their various parts // - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE),
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZInf(ZInfE), .XExpMax(XExpMaxE));
// fma - does multiply, add, and multiply-add instructions // fused multiply add
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), // - fadd/fsub
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), // - fmul
.Xm(XManE), .Ym(YManE), .Zm(ZManE), // - fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE), .OpCtrl(OpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
.Sm(SumE), .Pe(ProdExpE), .Sm(SmE), .Pe(PeE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE)); .ZmSticky(ZmStickyE), .KillProd(KillProdE));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, // divide and squareroot
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), // - fdiv
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal // - fsqrt
.EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // *** add other opperations
// other FP execution units divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE,
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE),
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); .EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE); // compare
// - fmin/fmax
// - flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection
// - fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), // classify
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), // - fclass
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert
// - fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE),
.LzcIn(CvtLzcInE)); .LzcIn(CvtLzcInE));
// data to be stored in memory - to IEU // data to be stored in memory - to IEU
// - FP uses NaN-blocking format // - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
if (`LLEN==`XLEN) begin if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0]; assign FWriteDataE = YE[`XLEN-1:0];
end else begin end else begin
logic [`FLEN-1:0] FWriteDataE; logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
else assign FStore2 = FmtM; else assign FStore2 = FmtM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE; if (`FPSIZES==1) assign FWriteDataE = YE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
end end
@ -306,14 +312,14 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate endgenerate
// select a result that may be written to the FP register // select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE); mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU // select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN) if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0]; assign IntSrcXE = XE[`XLEN-1:0];
else else
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
@ -321,27 +327,24 @@ module fpu (
// E/M pipe registers // E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE // BEGIN MEMORY STAGE
@ -357,11 +360,11 @@ module fpu (
assign FpLoadStoreM = FResSelM[1]; assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged // FPU flag selection - to privileged
@ -371,9 +374,6 @@ module fpu (
// M/W pipe registers // M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE

View File

@ -26,60 +26,59 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE. // OR OTHER DEALINGS IN THE SOFTWARE.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
module fsgninj ( module fsgninj (
input logic XSgnE, YSgnE, // X and Y sign bits input logic Xs, Ys, // X and Y sign bits
input logic [`FLEN-1:0] FSrcXE, // X input logic [`FLEN-1:0] X, // X
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // format
input logic [1:0] SgnOpCodeE, // operation control input logic [1:0] OpCtrl, // operation control
output logic [`FLEN-1:0] SgnResE // result output logic [`FLEN-1:0] SgnRes // result
); );
logic ResSgn; logic ResSgn;
//op code designation: // OpCtrl:
// // 00 - fsgnj - directly copy over sign value of Y
//00 - fsgnj - directly copy over sign value of FSrcYE // 01 - fsgnjn - negate sign value of Y
//01 - fsgnjn - negate sign value of FSrcYE // 10 - fsgnjx - XOR sign values of X and Y
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
//
// calculate the result's sign // calculate the result's sign
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE; assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
// format final result based on precision // format final result based on precision
// - uses NaN-blocking format // - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
if (`FPSIZES == 1) if (`FPSIZES == 1)
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2) else if (`FPSIZES == 2)
assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]}; assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin else if (`FPSIZES == 3) begin
logic [2:0] SgnBits; logic [2:0] SgnBits;
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]}; `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]}; `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn}; `FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}}; default: SgnBits = {3{1'bx}};
endcase endcase
assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]}; assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
logic [3:0] SgnBits; logic [3:0] SgnBits;
always_comb always_comb
case (FmtE) case (Fmt)
`Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]}; `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn}; `H_FMT: SgnBits = {3'b111, ResSgn};
endcase endcase
assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]}; assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
end end
endmodule endmodule

View File

@ -107,6 +107,6 @@ module otfc4 (
QMNext = {QMR, 2'b11}; QMNext = {QMR, 2'b11};
end end
end end
// Final Quoteint is in the range [.5, 2) // Final Qmeint is in the range [.5, 2)
endmodule endmodule

View File

@ -36,7 +36,7 @@ module postprocess (
input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] FOpCtrl, // choose which opperation (look below for values) input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, ZZero, // inputs are zero input logic XZero, YZero, ZZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN input logic XNaN, YNaN, ZNaN, // inputs are NaN
@ -54,7 +54,7 @@ module postprocess (
input logic FmaNegSum, // was the sum negitive input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z input logic FmaInvA, // do you invert Z
input logic FmaSs, input logic FmaSs,
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
//divide signals //divide signals
input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivS, input logic DivS,
@ -125,14 +125,14 @@ module postprocess (
logic Sqrt; logic Sqrt;
// signals to help readability // signals to help readability
assign Signed = FOpCtrl[0]; assign Signed = OpCtrl[0];
assign Int64 = FOpCtrl[1]; assign Int64 = OpCtrl[1];
assign IntToFp = FOpCtrl[2]; assign IntToFp = OpCtrl[2];
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00); assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10); assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01)&DivDone; assign DivOp = (PostProcSel == 2'b01)&DivDone;
assign Sqrt = FOpCtrl[0]; assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used // is there an input of infinity or NaN being used
assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
@ -142,9 +142,9 @@ module postprocess (
// - fp -> fp: OpCtrl contains the percision of the output // - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output // - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2) if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4) else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Normalization // Normalization
@ -152,7 +152,7 @@ module postprocess (
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);

View File

@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits
// for efficiency. You can probably optimize your logic to // for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay. // select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20 // Qmient equations from EE371 lecture notes 13-20
assign p = ps ^ pc; assign p = ps ^ pc;
assign g = ps & pc; assign g = ps & pc;

View File

@ -43,7 +43,7 @@ module shiftcorrection(
output logic [`NE+1:0] FmaMe // exponent of the normalized sum output logic [`NE+1:0] FmaMe // exponent of the normalized sum
); );
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
logic ResDenorm; // is the result denormalized logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -53,11 +53,11 @@ module shiftcorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
always_comb always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; else if (DivOp&~DivResDenorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent // Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2

View File

@ -37,15 +37,15 @@ module srt(
input logic [`FMTBITS-1:0] FmtE, input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] Xe, Ye, input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc, input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
input logic NegSticky, input logic NegSticky,
output logic [`QLEN-1-(`RADIX/4):0] Quot, output logic [`QLEN-1-(`RADIX/4):0] Qm,
output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] StickyWSA,
output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM, output logic [`NE+1:0] QeM,
output logic [`XLEN-1:0] Rem output logic [`XLEN-1:0] Rem
); );
@ -62,7 +62,7 @@ module srt(
/* verilator lint_on UNOPTFLAT */ /* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp; logic [`NE+1:0] Qe;
logic [$clog2(`XLEN+1)-1:0] intExp; logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign; logic intSign;
logic [`QLEN-1:0] QMMux; logic [`QLEN-1:0] QMMux;
@ -88,7 +88,7 @@ module srt(
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
// Divisor Selections // Divisor Selections
@ -123,7 +123,7 @@ module srt(
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
assign FirstWS = WS[0]; assign FirstWS = WS[0];
assign FirstWC = WC[0]; assign FirstWC = WC[0];
if(`RADIX==2) if(`RADIX==2)
@ -132,7 +132,7 @@ module srt(
else else
assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
endmodule endmodule
@ -155,7 +155,7 @@ module divinteration (
logic [3:0] q; logic [3:0] q;
logic qp, qz;//, qn; logic qp, qz;//, qn;
// Quotient Selection logic // Qmient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding: // q encoding:
// 1000 = +2 // 1000 = +2
@ -226,7 +226,7 @@ module expcalc(
input logic [`NE-1:0] Xe, Ye, input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp output logic [`NE+1:0] Qe
); );
logic [`NE-2:0] Bias; logic [`NE-2:0] Bias;
@ -255,5 +255,5 @@ module expcalc(
endcase endcase
end end
// correct exponent for denormalized input's normalization shifts // correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule endmodule

View File

@ -43,7 +43,7 @@ module srtfsm(
input logic [`DIVLEN+3:0] StickyWSA, input logic [`DIVLEN+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur, input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE, output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivStickyE, output logic DivSE,
output logic DivDone, output logic DivDone,
output logic NegSticky, output logic NegSticky,
output logic DivBusy output logic DivBusy
@ -65,9 +65,9 @@ module srtfsm(
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's // radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2) if (`RADIX == 2)
assign DivStickyE = |W&~(StickyWSA == WS); assign DivSE = |W&~(StickyWSA == WS);
else else
assign DivStickyE = |W; assign DivSE = |W;
assign DivDone = (state == DONE); assign DivDone = (state == DONE);
assign W = WC+WS; assign W = WC+WS;
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???

View File

@ -30,35 +30,34 @@
module unpack ( module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) output logic XExpMax // does X have the maximum exponent (NaN or Inf)
); );
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm),
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym),
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm),
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input denormalized // is the input denormalized
assign XDenormE = ~XExpNonZero & ~XFracZero; assign XDenorm = ~XExpNonZero & ~XFracZero;
assign ZDenormE = ~ZExpNonZero & ~ZFracZero; assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
endmodule endmodule

View File

@ -30,7 +30,7 @@
module unpackinput ( module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file input logic [`FLEN-1:0] In, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
@ -74,16 +74,16 @@ module unpackinput (
// quad and half // quad and half
// double and half // double and half
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// choose sign bit depending on format - 1=larger precsion 0=smaller precision // choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
// extract the fraction, add trailing zeroes to the mantissa if nessisary // extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero // is the exponent non-zero
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// example double to single conversion: // example double to single conversion:
// 1023 = 0011 1111 1111 // 1023 = 0011 1111 1111
@ -95,10 +95,10 @@ module unpackinput (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary // extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a denormal number convert the exponent value 1 // - if the original precision had a denormal number convert the exponent value 1
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
// is the exponent all 1's // is the exponent all 1's
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported end else if (`FPSIZES == 3) begin // three floating point precsions supported
@ -122,7 +122,7 @@ module unpackinput (
// Check NaN boxing // Check NaN boxing
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: BadNaNBox = 0; `FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@ -131,7 +131,7 @@ module unpackinput (
// extract the sign bit // extract the sign bit
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Sgn = In[`FLEN-1]; `FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1]; `FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1]; `FMT2: Sgn = In[`LEN2-1];
@ -140,7 +140,7 @@ module unpackinput (
// extract the fraction // extract the fraction
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Frac = In[`NF-1:0]; `FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@ -149,7 +149,7 @@ module unpackinput (
// is the exponent non-zero // is the exponent non-zero
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@ -166,7 +166,7 @@ module unpackinput (
// convert the larger precision's exponent to use the largest precision's bias // convert the larger precision's exponent to use the largest precision's bias
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
@ -175,7 +175,7 @@ module unpackinput (
// is the exponent all 1's // is the exponent all 1's
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF]; `FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1]; `FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2]; `FMT2: ExpMax = &In[`LEN2-2:`NF2];
@ -194,7 +194,7 @@ module unpackinput (
// Check NaN boxing // Check NaN boxing
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: BadNaNBox = 0; 2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@ -203,7 +203,7 @@ module unpackinput (
// extract sign bit // extract sign bit
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Sgn = In[`Q_LEN-1]; 2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1]; 2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1]; 2'b00: Sgn = In[`S_LEN-1];
@ -213,7 +213,7 @@ module unpackinput (
// extract the fraction // extract the fraction
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Frac = In[`Q_NF-1:0]; 2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -222,7 +222,7 @@ module unpackinput (
// is the exponent non-zero // is the exponent non-zero
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
@ -240,7 +240,7 @@ module unpackinput (
// convert the double precsion exponent into quad precsion // convert the double precsion exponent into quad precsion
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@ -250,7 +250,7 @@ module unpackinput (
// is the exponent all 1's // is the exponent all 1's
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; 2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; 2'b00: ExpMax = &In[`S_LEN-2:`S_NF];