From ee7932c804d05398d87785b15bfee62c6c273520 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 22 Jul 2022 22:02:04 +0000 Subject: [PATCH] divider sizes reworked to match book --- addins/riscv-arch-test | 2 +- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/config/shared/wally-shared.vh | 4 +- pipelined/regression/wave-fpu.do | 3 +- pipelined/src/fpu/divshiftcalc.sv | 7 +- pipelined/src/fpu/divsqrt.sv | 22 +-- pipelined/src/fpu/fctrl.sv | 6 +- pipelined/src/fpu/flags.sv | 6 +- pipelined/src/fpu/fpu.sv | 4 +- pipelined/src/fpu/otfc.sv | 20 +-- pipelined/src/fpu/postprocess.sv | 6 +- pipelined/src/fpu/qsel.sv | 12 +- pipelined/src/fpu/roundsign.sv | 3 +- pipelined/src/fpu/srt.sv | 171 +++++++++++----------- pipelined/src/fpu/srtfsm.sv | 14 +- pipelined/src/fpu/srtpreproc.sv | 23 +-- pipelined/srt/inttestgen.c | 2 +- pipelined/testbench/testbench-fp.sv | 186 ++++++++++++++---------- 18 files changed, 269 insertions(+), 224 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b2..be67c99b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 8f13b2e3..cc8d1b2b 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index b2abdff7..ea39ca35 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,8 +102,9 @@ // division constants `define RADIX 32'h4 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) @@ -113,6 +114,7 @@ `define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) +`define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1 `define USE_SRAM 0 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index b71207e0..e16d7b0b 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -32,8 +32,9 @@ add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 3fbc9419..8095b517 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,8 +1,9 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1-(`RADIX/4):0] DivQm, + input logic [`DIVb-(`RADIX/4):0] DivQm, input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivQe, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, @@ -34,8 +35,8 @@ module divshiftcalc( assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~(DivDenormShift[`NE+1]|Sqrt)}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7ba44a95..70610bcd 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -34,6 +34,7 @@ module divsqrt( input logic clk, input logic reset, input logic [`FMTBITS-1:0] FmtE, + input logic XsE, input logic [`NF:0] XmE, YmE, input logic [`NE-1:0] XeE, YeE, input logic XInfE, YInfE, @@ -48,23 +49,22 @@ module divsqrt( output logic DivDone, output logic [`NE+1:0] QeM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1-(`RADIX/4):0] QmM + output logic [`DIVb-(`RADIX/4):0] QmM // output logic [`XLEN-1:0] RemM, ); - logic [`DIVLEN+3:0] NextWSN, NextWCN; - logic [`DIVLEN+3:0] WS, WC; - logic [`DIVLEN+3:0] StickyWSA; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DIVLEN+3:0] X; - logic [`DIVLEN+3:0] Dpreproc; + logic [`DIVb+3:0] NextWSN, NextWCN; + logic [`DIVb+3:0] WS, WC; + logic [`DIVb+3:0] StickyWSA; + logic [`DIVb:0] X; + logic [`DIVN-2:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Qm(QmM), .Rem()); + srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM)); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 20e4a009..934aba2c 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -178,14 +178,14 @@ module fctrl ( // enables: // X - all except int->fp, store, load, mv int->fp -// Y - all except cvt, mv, load, class +// Y - all except cvt, mv, load, class, sqrt // Z - fma ops only // load/store mv int->fp cvt int->fp assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2])); // load/class mv cvt - assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0])))); assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]); - assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0])))); assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2]; // Final Res Sel: diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index c169ab2f..403b65fe 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -126,11 +126,11 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) @@ -163,7 +163,7 @@ module flags( // if dividing by zero and not 0/0 // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) - assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn); + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); // Combine flags // - to integer results do not set the underflow or overflow flags diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 3e214b0f..d98079b2 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,7 +125,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1-(`RADIX/4):0] QmM; + logic [`DIVb-(`RADIX/4):0] QmM; logic [`NE+1:0] QeE, QeM; logic DivSE, DivSM; logic DivDoneM; @@ -260,7 +260,7 @@ module fpu ( // - fsqrt // *** add other opperations divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE, .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); // compare diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 7ecb823e..1e794391 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -32,16 +32,16 @@ module otfc2 ( input logic qp, qz, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext + input logic [`DIVb:0] Q, QM, + output logic [`DIVb:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient // bits to the quotient as they come. // Use this otfc for division only. - logic [`QLEN-2:0] QR, QMR; + logic [`DIVb-1:0] QR, QMR; - assign QR = Q[`QLEN-2:0]; - assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM + assign QR = Q[`DIVb-1:0]; + assign QMR = QM[`DIVb-1:0]; // Shifted Q and QM always_comb begin if (qp) begin @@ -96,8 +96,8 @@ endmodule module otfc4 ( input logic [3:0] q, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext + input logic [`DIVb:0] Q, QM, + output logic [`DIVb:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient @@ -113,7 +113,7 @@ module otfc4 ( // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [`QLEN-3:0] QR, QMR; + logic [`DIVb-2:0] QR, QMR; // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -122,8 +122,8 @@ module otfc4 ( // else if q = -1 Q = {QM, 11} QM = {QM, 10} // else if q = -2 Q = {QM, 10} QM = {QM, 01} - assign QR = Q[`QLEN-3:0]; - assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM + assign QR = Q[`DIVb-2:0]; + assign QMR = QM[`DIVb-2:0]; // Shifted Q and QM always_comb begin if (q[3]) begin // +2 QNext = {QR, 2'b10}; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 4d9dc310..003c23d7 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -60,7 +60,7 @@ module postprocess ( input logic DivS, input logic DivDone, input logic [`NE+1:0] DivQe, - input logic [`QLEN-1-(`RADIX/4):0] DivQm, + input logic [`DIVb-(`RADIX/4):0] DivQm, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -154,7 +154,7 @@ module postprocess ( .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -199,7 +199,7 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .FmaSs, .Xs, .Ys, .CvtCs, .Ms); + .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 87c6a4b2..afb5b1d4 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -89,17 +89,17 @@ module fgen2 ( endmodule module qsel4 ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] WS, WC, + input logic [`DIVN-2:0] D, + input logic [`DIVb+3:0] WS, WC, input logic Sqrt, output logic [3:0] q ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; logic [2:0] Dmsbs; - assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4]; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}}; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... @@ -177,8 +177,8 @@ module fgen4 ( assign F2 = (~S << 2) & (C << 2); assign F1 = ~(S << 1) & C; assign F0 = '0; - assign FN1 = (SM << 1) | (C & ~(C << 2)); - assign FN2 = (SM << 2) | ((C << 2)&~(C <<4)); + assign FN1 = (SM << 1) | (C & ~(C << 3)); + assign FN2 = (SM << 2) | ((C << 2)&~(C << 4)); // Choose which adder input will be used diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index acecb594..62e882e6 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -34,6 +34,7 @@ module roundsign( input logic Xs, input logic Ys, input logic FmaNegSum, + input logic Sqrt, input logic FmaOp, input logic DivOp, input logic CvtOp, @@ -44,7 +45,7 @@ module roundsign( logic Qs; - assign Qs = Xs^Ys; + assign Qs = Xs^(Ys&~Sqrt); // Sign for rounding calulation assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 633ac178..55cde36d 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -37,40 +37,43 @@ module srt( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, input logic Sqrt, - input logic [`DIVLEN+3:0] X, - input logic [`DIVLEN+3:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic [`DIVb:0] X, + input logic [`DIVN-2:0] Dpreproc, input logic NegSticky, - output logic [`QLEN-1-(`RADIX/4):0] Qm, - output logic [`DIVLEN+3:0] NextWSN, NextWCN, - output logic [`DIVLEN+3:0] StickyWSA, - output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`XLEN-1:0] Rem + output logic [`DIVb-(`RADIX/4):0] Qm, + output logic [`DIVb+3:0] NextWSN, NextWCN, + output logic [`DIVb+3:0] StickyWSA, + output logic [`DIVb+3:0] FirstWS, FirstWC ); - +//QLEN = 1.(number of bits created for division) +// N is NF+1 or XLEN +// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift +// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-1:0 +// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0 +// Q/QM/S/SM should be 1.b so b+1 bits or b:0 +// C needs to be the lenght of the final fraction 0.b so b or b-1:0 /* verilator lint_off UNOPTFLAT */ - logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN??? - logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0]; + logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb:0] Q[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb:0] QM[`DIVCOPIES-1:0];// 1.b + logic [`DIVb:0] QNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] QMNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] S[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SM[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb:0] SMNext[`DIVCOPIES-1:0];// U1.b + logic [`DIVb-1:0] C[`DIVCOPIES-1:0]; // 0.b /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] WSN, WCN; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [$clog2(`XLEN+1)-1:0] intExp; - logic intSign; - logic [`QLEN-1:0] QMMux; - logic [`DIVLEN+3:0] CMux; - logic [`DIVLEN+3:0] SMux; + logic [`DIVb+3:0] WSN, WCN; // Q4.N-1 + logic [`DIVN-2:0] D; // U0.N-1 + logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 + logic [`DIVb:0] QMMux; + logic [`DIVb-1:0] CMux; + logic [`DIVb:0] SMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -81,27 +84,28 @@ module srt( // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized if (`RADIX == 2) begin : nextw - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+2:0], 1'b0}; end else begin - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+1:0], 2'b0}; end - mux2 #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN); - flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D); - mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux); - flop #(`DIVLEN+4) cflop(clk, CMux, C[0]); + mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); + flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); + mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); + flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); + flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); + mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux); + flop #(`DIVb) cflop(clk, CMux, C[0]); // Divisor Selections - // - choose the negitive version of what's being selected - assign DBar = ~D; + // - choose the negitive version of what's being selected + // - D is only the fraction + assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}}; if(`RADIX == 4) begin : d2 - assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; - assign D2 = {D[`DIVLEN+2:0], 1'b0}; + assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}}; + assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}}; end genvar i; @@ -112,12 +116,13 @@ module srt( .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0}; + assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0}; + assign WC[i+1] = {WCA[i][`DIVb+2:0], 1'b0}; + assign C[i+1] = {1'b1, C[i][`DIVb-1:1]}; end else begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; - assign C[i+1] = {2'b11, C[i][`DIVLEN+3:2]}; + assign WS[i+1] = {WSA[i][`DIVb+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVb+1:0], 2'b0}; + assign C[i+1] = {2'b11, C[i][`DIVb-1:2]}; end assign Q[i+1] = QNext[i]; assign QM[i+1] = QMNext[i]; @@ -128,30 +133,30 @@ module srt( endgenerate // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - - flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); - mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux); - flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]); + mux2 #(`DIVb+1) QMmux(QMNext[`DIVCOPIES-1], '1, DivStart, QMMux); + flopenr #(`DIVb+1) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); + flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]); + flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); + mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux); + flop #(`DIVb+1) Sreg(clk, SMux, S[0]); + // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted always_comb - if(Sqrt) - if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0]; - else Qm = S[0][`QLEN-1-(`RADIX/4):0]; + if(Sqrt) // sqrt ouputs in the range (1, .5] + if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0}; + else Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0}; else - if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0]; - else Qm = Q[0][`QLEN-1-(`RADIX/4):0]; + if(NegSticky) Qm = QM[0][`DIVb-(`RADIX/4):0]; + else Qm = Q[0][`DIVb-(`RADIX/4):0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; if(`RADIX==2) if (`DIVCOPIES == 1) - assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; + assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0}; else - assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; + assign StickyWSA = {WSA[1][`DIVb+2:0], 1'b0}; endmodule @@ -162,24 +167,24 @@ endmodule /* verilator lint_off UNOPTFLAT */ module divinteration ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] DBar, D2, DBar2, - input logic [`QLEN-1:0] Q, QM, - input logic [`DIVLEN+3:0] S, SM, - input logic [`DIVLEN+3:0] WS, WC, - input logic [`DIVLEN+3:0] C, + input logic [`DIVN-2:0] D, + input logic [`DIVb+3:0] DBar, D2, DBar2, + input logic [`DIVb:0] Q, QM, + input logic [`DIVb:0] S, SM, + input logic [`DIVb+3:0] WS, WC, + input logic [`DIVb-1:0] C, input logic Sqrt, - output logic [`QLEN-1:0] QNext, QMNext, - output logic [`DIVLEN+3:0] SNext, SMNext, - output logic [`DIVLEN+3:0] WSA, WCA + output logic [`DIVb:0] QNext, QMNext, + output logic [`DIVb:0] SNext, SMNext, + output logic [`DIVb+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] Dsel; + logic [`DIVb+3:0] Dsel; logic [3:0] q; - logic qp, qz;//, qn; - logic [`DIVLEN+3:0] F; - logic [`DIVLEN+3:0] AddIn; + logic qp, qz; + logic [`DIVb+3:0] F; + logic [`DIVb+3:0] AddIn; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -190,21 +195,21 @@ module divinteration ( // 0010 = -1 // 0001 = -2 if(`RADIX == 2) begin : qsel - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); + qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); end else begin qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); - fgen4 fgen4(.s(q), .C, .S, .SM, .F); + // fgen4 fgen4(.s(q), .C, .S, .SM, .F); end if(`RADIX == 2) begin : dsel - assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D); + assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}); end else begin always_comb case (q) 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = D; + 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase @@ -213,16 +218,16 @@ module divinteration ( // WSA, WCA = WS + WC - qD assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, qp, WSA, WCA); end else begin - csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); - sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); + // sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); end endmodule diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 597f96cd..7fe6b6b7 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -33,14 +33,16 @@ module srtfsm( input logic clk, input logic reset, - input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, + input logic [`DIVb+3:0] NextWSN, NextWCN, WS, WC, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStart, + input logic XsE, + input logic SqrtE, input logic StallE, input logic StallM, - input logic [`DIVLEN+3:0] StickyWSA, + input logic [`DIVb+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivSE, @@ -55,11 +57,11 @@ module srtfsm( logic [`DURLEN-1:0] step; logic WZero; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; - logic [`DIVLEN+3:0] W; + logic [`DIVb+3:0] W; //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); - assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); // calculate sticky bit // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant @@ -70,7 +72,7 @@ module srtfsm( assign DivSE = |W; assign DivDone = (state == DONE); assign W = WC+WS; - assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; always_ff @(posedge clk) begin @@ -78,7 +80,7 @@ module srtfsm( state <= #1 IDLE; end else if (DivStart&~StallE) begin step <= Dur; - if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; + if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE|(XsE&SqrtE)) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index 4d260917..63b2b977 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -39,16 +39,16 @@ module srtpreproc ( input logic Sqrt, input logic XZero, output logic [`NE+1:0] QeM, - output logic [`DIVLEN+3:0] X, - output logic [`DIVLEN+3:0] Dpreproc, - output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`DIVb:0] X, + output logic [`DIVN-2:0] Dpreproc, output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; logic [`NF-1:0] PreprocA, PreprocX; logic [`NF-1:0] PreprocB, PreprocY; - logic [`NF+3:0] SqrtX; + logic [`NF+1:0] SqrtX; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`NE+1:0] Qe; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; @@ -70,9 +70,9 @@ module srtpreproc ( assign PreprocY = Ym[`NF-1:0]< -#include +#include #include /* Constants */ diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 9be68f50..17383d1f 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -80,7 +80,7 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`QLEN-1-(`RADIX/4):0] Quot; + logic [`DIVb-(`RADIX/4):0] Quot; logic CvtResDenormUfE; logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; @@ -256,16 +256,16 @@ module testbenchfp; Fmt = {Fmt, 2'b11}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested - // // add the square-root tests/op-ctrls/unit/fmt - // Tests = {Tests, f128sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b11}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested Tests = {Tests, f128fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -383,16 +383,16 @@ module testbenchfp; Fmt = {Fmt, 2'b01}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f64sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b01}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested Tests = {Tests, f64fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -494,16 +494,16 @@ module testbenchfp; Fmt = {Fmt, 2'b00}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f32sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b00}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f32fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -587,16 +587,16 @@ module testbenchfp; Fmt = {Fmt, 2'b10}; end end - // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f16sqrt}; - // OpCtrl = {OpCtrl, `SQRT_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b10}; - // end - // end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f16fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; @@ -697,7 +697,7 @@ module testbenchfp; fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); - divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(1'b0), .SqrtM(1'b0), + divsqrt divsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone); @@ -1007,40 +1007,72 @@ module readvectors ( end endcase `DIVUNIT: - case (Fmt) - 2'b11: begin // quad - X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; - Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; - Ans = TestVector[8+(`Q_LEN-1):8]; - if (~clk) #5; - DivStart = 1'b1; #10 // one clk cycle - DivStart = 1'b0; - end - 2'b01: if (`D_SUPPORTED)begin // double - X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; - Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; - Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - 2'b00: if (`S_SUPPORTED)begin // single - X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; - Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; - Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - 2'b10: begin // half - X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; - Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; - Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; - if (~clk) #5; - DivStart = 1'b1; #10 - DivStart = 1'b0; - end - endcase + if(OpCtrl[0]) + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; + end + 2'b01: if (`D_SUPPORTED)begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b00: if (`S_SUPPORTED)begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + endcase + else + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; + Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; + end + 2'b01: if (`D_SUPPORTED)begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b00: if (`S_SUPPORTED)begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; + end + endcase `CMPUNIT: case (Fmt) 2'b11: begin // quad @@ -1259,7 +1291,7 @@ module readvectors ( end assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); - assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0])); assign ZEn = (Unit == `FMAUNIT); unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,