From b728e5054d2fb581321985668efabfca439b61b6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 11 Jul 2022 18:30:21 -0700 Subject: [PATCH] variable interations implemented in radix-4 divider --- pipelined/config/rv64fp/wally-config.vh | 4 +- pipelined/config/shared/wally-shared.vh | 12 +- pipelined/regression/wave-fpu.do | 10 +- pipelined/src/fpu/divshiftcalc.sv | 9 +- pipelined/src/fpu/divsqrt.sv | 10 +- pipelined/src/fpu/flags.sv | 38 ++-- pipelined/src/fpu/fmashiftcalc.sv | 24 +- pipelined/src/fpu/fpu.sv | 10 +- pipelined/src/fpu/lzacorrection.sv | 4 +- pipelined/src/fpu/postprocess.sv | 26 +-- pipelined/src/fpu/resultselect.sv | 290 ------------------------ pipelined/src/fpu/resultsign.sv | 4 +- pipelined/src/fpu/round.sv | 6 +- pipelined/src/fpu/srt-radix4.sv | 140 ++++++++---- pipelined/src/fpu/srtfsm.sv | 10 +- pipelined/src/fpu/srtpreproc.sv | 16 +- pipelined/testbench/testbench-fp.sv | 14 +- 17 files changed, 203 insertions(+), 424 deletions(-) delete mode 100644 pipelined/src/fpu/resultselect.sv diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index b92bc07a..cc8d1b2b 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,14 +32,14 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 // MISA RISC-V configuration per specification // ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100100101 +`define MISA 32'b0000000000101000001000100101101 `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 671f7343..c064783c 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -95,12 +95,22 @@ // largest length in IEU/FPU `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +// division constants +`define RADIX 4 +`define DIVCOPIES 4 +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) +`define LOGR ((`RADIX==2) ? 1 : 2) +`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2($rtoi(`FPDUR)+1)) +`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) + + `define USE_SRAM 0 // Disable spurious Verilator warnings diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9e7ba49b..58f782bd 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -11,7 +11,7 @@ add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivBusy add wave -noupdate /testbenchfp/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* @@ -21,8 +21,12 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* +add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* +add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 935ed3c1..a4f3feff 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,9 +1,9 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, input logic [`FMTBITS-1:0] Fmt, - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivCalcExp, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, @@ -32,9 +32,10 @@ module divshiftcalc( // inital Left shift amount = NF assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; // *** may be able to reduce shifter size - assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; + assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 086b97d8..c4f09aea 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -47,8 +47,8 @@ module divsqrt( output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, - output logic [`DIVLEN+2:0] QuotM + output logic [`DURLEN-1:0] EarlyTermShiftM, + output logic [`QLEN-1:0] QuotM // output logic [`XLEN-1:0] RemM, ); @@ -57,12 +57,12 @@ module divsqrt( logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 98250a45..4e16bc96 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -34,20 +34,20 @@ module flags( input logic XInf, YInf, ZInf, // inputs are infinity input logic Plus1, input logic InfIn, // is a Inf input being used + input logic NaNIn, // is a NaN input being used + input logic [`FMTBITS-1:0] OutFmt, // output format input logic XZero, YZero, // inputs are zero input logic XNaN, YNaN, // inputs are NaN - input logic NaNIn, // is a NaN input being used input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point input logic Int64, // convert to 64 bit integer input logic Signed, // convert to a signed integer - input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`NE:0] CvtCe, // the calculated expoent - Cvt input logic CvtOp, // conversion opperation? input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? - input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow input logic [`NE+1:0] Nexp, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs @@ -73,30 +73,30 @@ module flags( if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); + assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]); + `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); default: ResExpGteMax = 1'bx; endcase - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 4) begin always_comb case (OutFmt) - `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE]; - `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]); + `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; + `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); + `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); + `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); endcase // a left shift of intlen+1 is still in range but any more than that is an overflow // inital: | 64 0's | XLEN | @@ -110,14 +110,14 @@ module flags( // - any of the bits after the most significan 1 is one // - the most signifcant in 65 or 33 is still a one in the number and // one of the later bits is one - assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end // if the result is greater than or equal to the max exponent(not taking into account sign) // | and the exponent isn't negitive // | | if the input isnt infinity or NaN // | | | - assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero); + assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); // detecting tininess after rounding // the exponent is negitive @@ -127,7 +127,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed @@ -153,7 +153,7 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 5f55e17b..d4898e80 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -37,7 +37,7 @@ module fmashiftcalc( input logic FmaKillProd, // is the product set to zero input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic FmaSmZero, // is the result denormalized - calculated before LZA corection + output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero @@ -50,7 +50,7 @@ module fmashiftcalc( /////////////////////////////////////////////////////////////////////////////// //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero - assign FmaSmZero = ~(|FmaSm); + assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); @@ -90,7 +90,7 @@ module fmashiftcalc( logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; + assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; @@ -98,7 +98,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero; + assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -110,9 +110,9 @@ module fmashiftcalc( assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; always_comb begin case (Fmt) - `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; + `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; default: FmaPreResultDenorm = 1'bx; endcase end @@ -129,10 +129,10 @@ module fmashiftcalc( assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; always_comb begin case (Fmt) - 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; - 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero; + 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking end @@ -144,7 +144,7 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero; + // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 8336c39c..e1c9e5fa 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,12 +125,12 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN+2:0] QuotE, QuotM; + logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -289,7 +289,7 @@ module fpu ( divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM)); + .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); @@ -381,12 +381,12 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M), + postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), - .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); + .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 03b36f4f..17db0c0b 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -38,7 +38,7 @@ module lzacorrection( input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaKillProd, // is the product set to zero - input logic FmaSmZero, + input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, output logic [`NE+1:0] FmaSe // exponent of the normalized sum @@ -59,7 +59,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e165e7e1..18452abd 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -54,12 +54,12 @@ module postprocess( input logic FmaInvA, // do you invert Z input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -69,7 +69,7 @@ module postprocess( input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) input logic IntZero, // is the input zero // final results - output logic [`FLEN-1:0] W, // FMA final result + output logic [`FLEN-1:0] PostProcRes, // FMA final result output logic [4:0] PostProcFlg, output logic [`XLEN-1:0] FCvtIntRes // the int conversion result ); @@ -81,7 +81,7 @@ module postprocess( logic Nsgn; logic [`NE+1:0] Nexp; logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction - logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) logic R; // bits needed to determine rounding @@ -95,7 +95,7 @@ module postprocess( logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum - logic FmaSmZero; // is the sum zero + logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection @@ -153,8 +153,8 @@ module postprocess( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -185,7 +185,7 @@ module postprocess( lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac); + .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -204,14 +204,14 @@ module postprocess( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .DivSticky, .DivNegSticky, .DivDone, - .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -220,7 +220,7 @@ module postprocess( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, + .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// @@ -228,10 +228,10 @@ module postprocess( /////////////////////////////////////////////////////////////////////////////// negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, - .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes); + .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv deleted file mode 100644 index 4389056f..00000000 --- a/pipelined/src/fpu/resultselect.sv +++ /dev/null @@ -1,290 +0,0 @@ -/////////////////////////////////////////// -// -// Written: me@KatherineParry.com -// Modified: 7/5/2022 -// -// Purpose: special case selection -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module resultselect( - input logic Xs, // input signs - input logic [`NF:0] Xm, Ym, Zm, // input mantissas - input logic XNaN, YNaN, ZNaN, // inputs are NaN - input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, - input logic XInf, YInf, - input logic XZero, - input logic IntZero, - input logic NaNIn, - input logic IntToFp, - input logic Int64, - input logic Signed, - input logic CvtOp, - input logic DivOp, - input logic FmaOp, - input logic Plus1, - input logic DivByZero, - input logic [`NE:0] CvtCe, // the calculated expoent - input logic Ws, // the res's sign - input logic IntInvalid, Invalid, Overflow, // flags - input logic CvtResUf, - input logic [`NE-1:0] Re, // Res exponent - input logic [`NE+1:0] FullResExp, // Res exponent - input logic [`NF-1:0] Rf, // Res fraction - input logic [`XLEN+1:0] CvtNegRes, // the negation of the result - output logic [`FLEN-1:0] W, // final res - output logic [`XLEN-1:0] FCvtIntRes // final res -); - logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results - logic OfResMax; - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output - logic KillRes; - logic SelOfRes; - - - // does the overflow result output the maximum normalized floating point number - // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); - - if (`FPSIZES == 1) begin - - //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = {Ws, Re, Rf}; - - end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; - end - default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); - end else begin - InvalidRes = (`FLEN)'(0); - end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); - end - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; - end - 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; - end - 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; - // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; - end - endcase - - end - - - - - - // determine if you shoould kill the res - Cvt - // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 - // - dont set to zero if fp input is zero but not using the fp input - // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); - assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); - // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign W = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end - - /////////////////////////////////////////////////////////////////////////////////////// - // - // ||||||||||| ||| ||| ||||||||||||| - // ||| |||||| ||| ||| - // ||| ||| ||| ||| ||| - // ||| ||| |||||| ||| - // ||||||||||| ||| ||| ||| - // - /////////////////////////////////////////////////////////////////////////////////////// - - // *** probably can optimize the negation - // select the overflow integer res - // - negitive infinity and out of range negitive input - // | int | long | - // signed | -2^31 | -2^63 | - // unsigned | 0 | 0 | - // - // - positive infinity and out of range positive input and NaNs - // | int | long | - // signed | 2^31-1 | 2^63-1 | - // unsigned | 2^32-1 | 2^64-1 | - // - // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive - - - // select the integer output - // - if the input is invalid (out of bounds NaN or Inf) then output overflow res - // - if the input underflows - // - if rounding and signed opperation and negitive input, output -1 - // - otherwise output a rounded 0 - // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index 05c3b461..e6de0c18 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -35,7 +35,7 @@ module resultsign( input logic InfIn, input logic FmaOp, input logic [`NE+1:0] FmaSe, - input logic FmaSmZero, + input logic FmaSZero, input logic Mult, input logic R, input logic S, @@ -61,6 +61,6 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn; + assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 4c185ff3..c73edc08 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -57,7 +57,7 @@ module round( input logic DivSticky, // sticky bit input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result - output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit @@ -344,8 +344,8 @@ module round( // round the result // - if the fraction overflows one should be added to the exponent - assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd; - assign Re = FullResExp[`NE-1:0]; + assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 741d4e83..1c7b9648 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" -module srtradix4 ( +module srtradix4( input logic clk, input logic DivStart, input logic DivBusy, @@ -40,20 +40,29 @@ module srtradix4 ( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`DIVLEN+2:0] Quot, + output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] WSN, WCN, - output logic [`DIVLEN+3:0] WS, WC, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); - logic [3:0] q; - logic [`DIVLEN+3:0] WSA; - logic [`DIVLEN+3:0] WCA; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; + logic [`QLEN-1:0] QMux, QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -63,47 +72,43 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // *** change this for radix 4 - generate w/ stine code - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - // Divisor Selection logic - // *** radix 4 change to choose -2 to 2 + // Divisor Selections // - choose the negitive version of what's being selected assign DBar = ~D; assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; assign D2 = {D[`DIVLEN+2:0], 1'b0}; - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {(`DIVLEN+4){1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase + genvar i; + generate + for(i=0; i<`DIVCOPIES; i++) begin + divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<3) begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - //*** change for radix 4 - otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot); + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + + assign Quot = Q[0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); @@ -113,7 +118,50 @@ endmodule // Submodules // //////////////// + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + qsel4 qsel4(.D, .WS, .WC, .q); + + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = {`DIVLEN+4{1'b0}}; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = {`DIVLEN+4{1'bx}}; + endcase + + // Partial Product Generation + // WSA, WCA = WS + WC - qD + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + + otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext); + +endmodule module qsel4 ( input logic [`DIVLEN+3:0] D, @@ -195,7 +243,8 @@ module otfc4 ( input logic DivStart, input logic DivBusy, input logic [3:0] q, - output logic [`DIVLEN+2:0] Quot + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient @@ -207,16 +256,11 @@ module otfc4 ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; + // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [`DIVLEN:0] QR, QMR; - // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); - mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); - flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage - flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); + logic [`QLEN-3:0] QR, QMR; // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -227,8 +271,8 @@ module otfc4 ( // *** how does the 0 concatination numbers work? always_comb begin - QR = Quot[`DIVLEN:0]; - QMR = QM[`DIVLEN:0]; // Shift Q and QM + QR = Q[`QLEN-3:0]; + QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 008b234d..fc73cf71 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -40,8 +40,8 @@ module srtfsm( input logic DivStart, input logic StallE, input logic StallM, - input logic [$clog2(`DIVLEN/2+3)-1:0] Dur, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, + input logic [`DURLEN-1:0] Dur, + output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, output logic DivNegStickyE, @@ -51,7 +51,7 @@ module srtfsm( typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; - logic [$clog2(`DIVLEN/2+3)-1:0] step; + logic [`DURLEN-1:0] step; logic WZero; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVLEN+3:0] W; @@ -63,7 +63,7 @@ module srtfsm( assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? - assign EarlyTermShiftDiv2E = step; + assign EarlyTermShiftE = step; always_ff @(posedge clk) begin if (reset) begin @@ -73,7 +73,7 @@ module srtfsm( if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin + if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index d17d2abd..fa76c051 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -35,7 +35,7 @@ module srtpreproc ( output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [$clog2(`DIVLEN/2+3)-1:0] Dur + output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; @@ -63,10 +63,20 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2); + + assign Dur = (`DURLEN)'($rtoi(`FPDUR)); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + // radix 2 radix 4 + // 1 copies DIVLEN+2 DIVLEN+2/2 + // 2 copies DIVLEN+2/2 DIVLEN+2/2*2 + // 4 copies DIVLEN+2/4 DIVLEN+2/2*4 + // 8 copies DIVLEN+2/8 DIVLEN+2/2*8 + + // DIVRESLEN = DIVLEN or DIVLEN+2 + // r = 1 or 2 + // DIVRESLEN/(r*`DIVCOPIES) + endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index fa46a060..ba14499e 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -80,9 +80,9 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`DIVLEN+2:0] Quot; + logic [`QLEN-1:0] Quot; logic CvtResDenormUfE; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2; + logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; @@ -90,7 +90,7 @@ module testbenchfp; logic [`DIVLEN+3:0] WSN, WS; logic [`DIVLEN+3:0] WCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; // in-between FMA signals logic Mult; @@ -686,8 +686,8 @@ module testbenchfp; .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, - .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), - .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes)); + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0;