From b469831b5354f750a83b8de2f7e882c456ef9736 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 28 Dec 2022 17:46:53 -0600 Subject: [PATCH 01/15] one bitt removed from inital lignment shift --- pipelined/config/shared/wally-shared.vh | 4 ++-- pipelined/src/fpu/fma/fma.sv | 11 +++++----- pipelined/src/fpu/fma/fmaadd.sv | 12 +++++------ pipelined/src/fpu/fma/fmaalign.sv | 20 +++++++++---------- pipelined/src/fpu/fma/fmalza.sv | 2 +- pipelined/src/fpu/fpu.sv | 8 ++++---- pipelined/src/fpu/postproc/fmashiftcalc.sv | 14 ++++++------- pipelined/src/fpu/postproc/postprocess.sv | 12 +++++------ pipelined/src/fpu/postproc/shiftcorrection.sv | 4 ++-- pipelined/testbench/testbench-fp.sv | 4 ++-- 10 files changed, 46 insertions(+), 45 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index cc24c42f6..cb2930a7a 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -104,9 +104,9 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+8)) +`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+7) ? (`QLEN+`NF+1) : (3*`NF+7))//change `define LOGNORMSHIFTSZ ($clog2(`NORMSHIFTSZ)) -`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change // division constants `define RADIX 32'h2 diff --git a/pipelined/src/fpu/fma/fma.sv b/pipelined/src/fpu/fma/fma.sv index c1a69e6fc..eb2213da5 100644 --- a/pipelined/src/fpu/fma/fma.sv +++ b/pipelined/src/fpu/fma/fma.sv @@ -37,18 +37,18 @@ module fma( input logic XZero, YZero, ZZero, // is the input zero input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) output logic ZmSticky, // sticky bit that is calculated during alignment - output logic [3*`NF+5:0] Sm, // the positive sum's significand + output logic [3*`NF+4:0] Sm,//change // the positive sum's significand output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [`NE+1:0] Se, - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count + output logic [$clog2(3*`NF+6)-1:0] SCnt//change // normalization shift count ); logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*`NF+4:0] Am;//change // addend aligned's mantissa for addition in U(NF+5.2NF+1) + logic [3*`NF+4:0] AmInv; //change // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed logic KillProd; // set the product to zero before addition if the product is too small to matter logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign @@ -85,7 +85,8 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + //change + fmalza #(3*`NF+5) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index adb8f4504..0991e44b0 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fmaadd( - input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [3*`NF+4:0] Am, //change // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic InvA, // invert the aligned addend @@ -39,13 +39,13 @@ module fmaadd( input logic ZmSticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted + output logic [3*`NF+4:0] AmInv,//change // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic Ss, output logic [`NE+1:0] Se, - output logic [3*`NF+5:0] Sm // the positive sum + output logic [3*`NF+4:0] Sm//change // the positive sum ); - logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*`NF+4:0] PreSum, NegPreSum;//change // possibly negitive sum logic [3*`NF+5:0] PreSumdebug, NegPreSumdebug; // possibly negitive sum logic NegSum; // was the sum negitive logic NegSumdebug; // was the sum negitive @@ -65,8 +65,8 @@ module fmaadd( // ie ~(InvA&ZmSticky&~KillProd)&InvA = (~ZmSticky|KillProd)&InvA // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation // ie ~(InvA&ZmSticky&KillProd)&InvA = (~ZmSticky|~KillProd)&InvA - assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, (~ZmSticky|KillProd)&InvA}; - assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+3)'(0), (~ZmSticky|~KillProd)&InvA, 2'b0}; + assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+5{1'b0}}, (~ZmSticky|KillProd)&InvA};//change + assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+2)'(0), (~ZmSticky|~KillProd)&InvA, 2'b0};//change // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; diff --git a/pipelined/src/fpu/fma/fmaalign.sv b/pipelined/src/fpu/fma/fmaalign.sv index e423c19dd..85b28c7b5 100644 --- a/pipelined/src/fpu/fma/fmaalign.sv +++ b/pipelined/src/fpu/fma/fmaalign.sv @@ -35,14 +35,14 @@ module fmaalign( input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero - output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic [3*`NF+4:0] Am,//change // addend aligned for addition in U(NF+5.2NF+1) output logic ZmSticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [4*`NF+4:0] ZmShifted;//change // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+4:0] ZmPreshifted;//change // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; /////////////////////////////////////////////////////////////////////////////// @@ -53,16 +53,16 @@ module fmaalign( // - negitive means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze}; // Defualt Addition with only inital left shift - // | 54'b0 | 106'b(product) | 2'b0 | + // | 53'b0 | 106'b(product) | 2'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; + assign ZmPreshifted = {Zm,(3*`NF+4)'(0)}; //change assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(4));//change always_comb begin @@ -72,7 +72,7 @@ module fmaalign( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | if (KillProd) begin - ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; + ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+2)'(0)};//change ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition @@ -90,12 +90,12 @@ module fmaalign( // | addnend | end else begin ZmShifted = ZmPreshifted >> ACnt; - ZmSticky = |(ZmShifted[`NF-1:0]); + ZmSticky = |(ZmShifted[`NF-1:0]); end end - assign Am = ZmShifted[4*`NF+5:`NF]; + assign Am = ZmShifted[4*`NF+4:`NF];//change endmodule diff --git a/pipelined/src/fpu/fma/fmalza.sv b/pipelined/src/fpu/fma/fmalza.sv index 1f6677ddc..182075bde 100644 --- a/pipelined/src/fpu/fma/fmalza.sv +++ b/pipelined/src/fpu/fma/fmalza.sv @@ -42,7 +42,7 @@ module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and d logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; - assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product + assign B = {{(`NF+1){1'b0}}, Pm};//change // Zero extend product assign P = A^B; assign G = A&B; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 4ae12462d..1ebd391c0 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -109,14 +109,14 @@ module fpu ( logic XExpMaxE; // is the exponent all ones (max value) // Fma Signals - logic [3*`NF+5:0] SmE, SmM; + logic [3*`NF+4:0] SmE, SmM;//change logic ZmStickyE, ZmStickyM; logic [`NE+1:0] SeE,SeM; logic InvAE, InvAM; logic AsE, AsM; logic PsE, PsM; logic SsE, SsM; - logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM; + logic [$clog2(3*`NF+6)-1:0] SCntE, SCntM;//change // Cvt Signals logic [`NE:0] CeE, CeM; // the calculated expoent @@ -352,8 +352,8 @@ module fpu ( {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*`NF+7)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + flopenrc #(3*`NF+5) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);//change + flopenrc #($clog2(3*`NF+6)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, //change {ZmStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {ZmStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, diff --git a/pipelined/src/fpu/postproc/fmashiftcalc.sv b/pipelined/src/fpu/postproc/fmashiftcalc.sv index 1e8012784..5c301da7d 100644 --- a/pipelined/src/fpu/postproc/fmashiftcalc.sv +++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv @@ -30,15 +30,15 @@ `include "wally-config.vh" module fmashiftcalc( - input logic [3*`NF+5:0] FmaSm, // the positive sum - input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count + input logic [3*`NF+4:0] FmaSm,//change // the positive sum + input logic [$clog2(3*`NF+6)-1:0] FmaSCnt,//change // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`NE+1:0] FmaSe, output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection - output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*`NF+7:0] FmaShiftIn // is the sum zero + output logic [$clog2(3*`NF+6)-1:0] FmaShiftAmt,//change // normalization shift count + output logic [3*`NF+6:0] FmaShiftIn//change // is the sum zero ); logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; @@ -50,7 +50,7 @@ module fmashiftcalc( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+6)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);//change //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -150,7 +150,7 @@ module fmashiftcalc( // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {2'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2): FmaSCnt+1;//change else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2)+BiasCorr[$clog2(3*`NF+6)-1:0]: FmaSCnt+1;//change endmodule diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index 368f3ef77..7c758b28c 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -47,10 +47,10 @@ module postprocess ( input logic FmaAs, // the modified Z sign - depends on instruction input logic FmaPs, // the product's sign input logic [`NE+1:0] FmaSe, - input logic [3*`NF+5:0] FmaSm, // the positive sum + input logic [3*`NF+4:0] FmaSm,//change // the positive sum input logic FmaZmS, // sticky bit that is calculated during alignment input logic FmaSs, - input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count + input logic [$clog2(3*`NF+6)-1:0] FmaSCnt,//change // the normalization shift count //divide signals input logic DivS, // input logic DivDone, @@ -89,10 +89,10 @@ module postprocess ( // fma signals logic [`NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*`NF+7:0] FmaShiftIn; // shift input + logic [3*`NF+6:0] FmaShiftIn;//change // shift input logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection - logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count + logic [$clog2(3*`NF+6)-1:0] FmaShiftAmt;//change // normalization shift count // division singals logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; @@ -152,8 +152,8 @@ module postprocess ( always_comb case(PostProcSel) 2'b10: begin // fma - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+8){1'b0}}}; + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+6){1'b0}}, FmaShiftAmt};//change + ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+7){1'b0}}};//change end 2'b00: begin // cvt ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; diff --git a/pipelined/src/fpu/postproc/shiftcorrection.sv b/pipelined/src/fpu/postproc/shiftcorrection.sv index eca97bcf9..588daa945 100644 --- a/pipelined/src/fpu/postproc/shiftcorrection.sv +++ b/pipelined/src/fpu/postproc/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] Qe, output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); - logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction + logic [3*`NF+4:0] CorrSumShifted;//change // the shifted sum after LZA correction logic [`CORRSHIFTSZ-1:0] CorrQmShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction @@ -56,7 +56,7 @@ module shiftcorrection( assign CorrQmShifted = (LZAPlus1|(DivQe==1&~LZAPlus1)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+5){1'b0}}};//change else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index c20dd3ad7..d09534829 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -94,8 +94,8 @@ module testbenchfp; logic [`NE+1:0] Se; logic ZmSticky; logic KillProd; - logic [$clog2(3*`NF+7)-1:0] SCnt; - logic [3*`NF+5:0] Sm; + logic [$clog2(3*`NF+6)-1:0] SCnt; + logic [3*`NF+4:0] Sm; logic InvA; logic NegSum; logic As; From e5a76817df697311ea71462ce6220c5a3108127b Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 29 Dec 2022 15:54:17 -0600 Subject: [PATCH 02/15] minor optimizations and renaming --- pipelined/src/fpu/fma/fma.sv | 8 ++++---- pipelined/src/fpu/fma/fmaadd.sv | 11 ++++++----- pipelined/src/fpu/fma/fmaalign.sv | 9 +++++---- pipelined/src/fpu/fpu.sv | 2 +- pipelined/testbench/testbench-fp.sv | 6 +++--- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pipelined/src/fpu/fma/fma.sv b/pipelined/src/fpu/fma/fma.sv index eb2213da5..4d60f477f 100644 --- a/pipelined/src/fpu/fma/fma.sv +++ b/pipelined/src/fpu/fma/fma.sv @@ -36,7 +36,7 @@ module fma( input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic XZero, YZero, ZZero, // is the input zero input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - output logic ZmSticky, // sticky bit that is calculated during alignment + output logic ASticky, // sticky bit that is calculated during alignment output logic [3*`NF+4:0] Sm,//change // the positive sum's significand output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) @@ -75,7 +75,7 @@ module fma( fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Am, .ZmSticky, .KillProd); + .Am, .ASticky, .KillProd); @@ -83,10 +83,10 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ZmSticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); + fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); //change - fmalza #(3*`NF+5) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza #(3*`NF+5) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ASticky&KillProd}), .Cin(InvA & ~(ASticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index 0991e44b0..509adb674 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -36,7 +36,7 @@ module fmaadd( input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 - input logic ZmSticky, + input logic ASticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, output logic [3*`NF+4:0] AmInv,//change // aligned addend possibly inverted @@ -62,11 +62,12 @@ module fmaadd( // - calculate a positive and negitive sum in parallel // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum // prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation - // ie ~(InvA&ZmSticky&~KillProd)&InvA = (~ZmSticky|KillProd)&InvA + // ie ~(InvA&ASticky&~KillProd)&InvA = (~ASticky|KillProd)&InvA // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation - // ie ~(InvA&ZmSticky&KillProd)&InvA = (~ZmSticky|~KillProd)&InvA - assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+5{1'b0}}, (~ZmSticky|KillProd)&InvA};//change - assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+2)'(0), (~ZmSticky|~KillProd)&InvA, 2'b0};//change + // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA + // in this case this result is only ever selected when InvA=1 so we can remove &InvA + assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+5{1'b0}}, (~ASticky|KillProd)&InvA};//change + assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 2'b0};//change // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; diff --git a/pipelined/src/fpu/fma/fmaalign.sv b/pipelined/src/fpu/fma/fmaalign.sv index 85b28c7b5..67dc0b824 100644 --- a/pipelined/src/fpu/fma/fmaalign.sv +++ b/pipelined/src/fpu/fma/fmaalign.sv @@ -36,7 +36,7 @@ module fmaalign( input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero output logic [3*`NF+4:0] Am,//change // addend aligned for addition in U(NF+5.2NF+1) - output logic ZmSticky, // Sticky bit calculated from the aliged addend + output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); @@ -44,6 +44,7 @@ module fmaalign( logic [4*`NF+4:0] ZmShifted;//change // output of the alignment shifter including sticky bits U(NF+5.3NF+1) logic [4*`NF+4:0] ZmPreshifted;//change // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; + logic PmSticky, tmpZmSticky; /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -73,7 +74,7 @@ module fmaalign( // | addnend | if (KillProd) begin ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+2)'(0)};//change - ZmSticky = ~(XZero|YZero); + ASticky = ~(XZero|YZero); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the product to be considered too small @@ -83,14 +84,14 @@ module fmaalign( // | addnend | end else if (KillZ) begin ZmShifted = 0; - ZmSticky = ~ZZero; + ASticky = ~ZZero; // If the Addend is shifted right // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | end else begin ZmShifted = ZmPreshifted >> ACnt; - ZmSticky = |(ZmShifted[`NF-1:0]); + ASticky = |(ZmShifted[`NF-1:0]); end end diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 1ebd391c0..507cd9086 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -258,7 +258,7 @@ module fpu ( .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), - .ZmSticky(ZmStickyE)); + .ASticky(ZmStickyE)); // divide and squareroot // - fdiv diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index d09534829..f5986c839 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -92,7 +92,7 @@ module testbenchfp; logic Ss; logic [`NE+1:0] Pe; logic [`NE+1:0] Se; - logic ZmSticky; + logic ASticky; logic KillProd; logic [$clog2(3*`NF+6)-1:0] SCnt; logic [3*`NF+4:0] Sm; @@ -690,7 +690,7 @@ module testbenchfp; .Xm(Xm), .Ym(Ym), .Zm(Zm), .XZero, .YZero, .ZZero, .Ss, .Se, .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps, - .ZmSticky); + .ASticky); end postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), @@ -700,7 +700,7 @@ module testbenchfp; .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaZmS(ZmSticky), .FmaSe(Se), + .FmaZmS(ASticky), .FmaSe(Se), .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); From 0ecbb45b785ae5c5a8f3128a3bfe13b592640e7b Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 29 Dec 2022 21:09:23 -0800 Subject: [PATCH 03/15] Fixed register timing failure on SpecialCaseM in fdivsqrt --- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index a5735ba3b..c16abd9b9 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -69,7 +69,8 @@ module fdivsqrtfsm( assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; end else assign SpecialCaseE = FSpecialCaseE; - flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc + //flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc + flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 // NS = NF + 1 From 18f19ce44d4e1cd9cbed6500aadc139178e452e1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:40:25 -0800 Subject: [PATCH 04/15] fdiv cleanup, reduce number of rv32f fma_b15 tests being run to speed up regression --- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 1 - pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 16 +++++++--------- pipelined/testbench/tests.vh | 4 ++-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index c16abd9b9..a950ea7b3 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -69,7 +69,6 @@ module fdivsqrtfsm( assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; end else assign SpecialCaseE = FSpecialCaseE; - //flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 8bedd3841..6d955d611 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -157,13 +157,6 @@ module fdivsqrtpostproc( end else begin NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); PreResultM = IntQuotM; - /* - if (~ALTBM & NegQuotM) begin - PreResultM = {3'b111, -IntQuotM}; - end else begin - PreResultM = {3'b000, IntQuotM}; - end*/ - //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender end @@ -171,7 +164,12 @@ module fdivsqrtpostproc( assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases - // *** conditional on RV64 - assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 + + // sign extend result for W64 + if (`XLEN==64) + assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : + SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 + else + assign FPIntDivResultM = SpecialFPIntDivResultM[`XLEN-1:0]; end endmodule \ No newline at end of file diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 61e45d9e4..48a29303b 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1098,7 +1098,7 @@ string imperas32f[] = '{ "rv64i_m/F/src/flw-align-01.S", "rv64i_m/F/src/fmadd_b1-01.S", "rv64i_m/F/src/fmadd_b14-01.S", - "rv64i_m/F/src/fmadd_b15-01.S", + //"rv64i_m/F/src/fmadd_b15-01.S", "rv64i_m/F/src/fmadd_b16-01.S", "rv64i_m/F/src/fmadd_b17-01.S", "rv64i_m/F/src/fmadd_b18-01.S", @@ -1473,7 +1473,7 @@ string imperas32f[] = '{ "rv32i_m/F/src/fmin_b19-01.S", "rv32i_m/F/src/fmsub_b1-01.S", "rv32i_m/F/src/fmsub_b14-01.S", - "rv32i_m/F/src/fmsub_b15-01.S", + //"rv32i_m/F/src/fmsub_b15-01.S", "rv32i_m/F/src/fmsub_b16-01.S", "rv32i_m/F/src/fmsub_b17-01.S", "rv32i_m/F/src/fmsub_b18-01.S", From d2273e7037d37738e459b83d2792034b40cdbbc0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:45:51 -0800 Subject: [PATCH 05/15] fdivsqrtpreproc shift simplification --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b3f42a7c4..cb8833658 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -57,7 +57,6 @@ module fdivsqrtpreproc ( // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`DIVBLEN:0] mE; - logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; logic [`LOGRK:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; @@ -71,6 +70,7 @@ module fdivsqrtpreproc ( logic AsE, BsE, ALTBE, NegQuotE; logic [`XLEN-1:0] AE, BE; logic [`XLEN-1:0] PosA, PosB; + logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; @@ -108,12 +108,12 @@ module fdivsqrtpreproc ( /* verilator lint_off WIDTH */ // right shift amount to complete in discrete number of steps - assign pPlusr = (`DIVBLEN)'(`LOGR) + p; + assign pPlusr = `LOGR + p; assign pPrTrunc = pPlusr % `RK; - assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; - assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK); + assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc; + assign nE = (pPrCeil * `DIVCOPIES) - 1; + assign IntBits = `LOGR + p - 1; + assign RightShiftX = `RK - 1 - IntBits % `RK; /* verilator lint_on WIDTH */ // Selet integer or floating-point operands From 802c440254facdf781ff4df1f93c461887788d6e Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:47:40 -0800 Subject: [PATCH 06/15] Reduced size of preproc right shift --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cb8833658..d0a060795 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -70,7 +70,8 @@ module fdivsqrtpreproc ( logic AsE, BsE, ALTBE, NegQuotE; logic [`XLEN-1:0] AE, BE; logic [`XLEN-1:0] PosA, PosB; - logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; + logic [`DIVBLEN:0] ZeroDiff, IntBits; + logic [`LOGRK-1:0] RightShiftX; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; From 27588af00e753db11e2503b9e76174a871783fc1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:55:20 -0800 Subject: [PATCH 07/15] Clean up sqrt initialization mux --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index d0a060795..e56456742 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -51,20 +51,14 @@ module fdivsqrtpreproc ( ); logic [`DIVb-1:0] XPreproc; - logic [`DIVb:0] SqrtX; - logic [`DIVb+3:0] DivX; + logic [`DIVb:0] PreSqrtX; + logic [`DIVb+3:0] DivX, SqrtX; logic [`NE+1:0] QeE; - // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; - logic [`DIVBLEN:0] mE; - logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; - logic [`LOGRK:0] pPrTrunc; + logic [`DIVBLEN:0] mE, ell; logic [`DIVb+3:0] PreShiftX; logic NumZeroE; - // ***can probably merge X LZC with conversion - // cout the number of leading zeros - if (`IDIV_ON_FPU) begin logic signedDiv; logic AsE, BsE, ALTBE, NegQuotE; @@ -72,6 +66,8 @@ module fdivsqrtpreproc ( logic [`XLEN-1:0] PosA, PosB; logic [`DIVBLEN:0] ZeroDiff, IntBits; logic [`LOGRK-1:0] RightShiftX; + logic [`DIVBLEN:0] pPlusr, pPrCeil, p; + logic [`LOGRK-1:0] pPrTrunc; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; @@ -149,16 +145,15 @@ module fdivsqrtpreproc ( assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // append leading 1 (for nonzero inputs) and zero-extend - assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF + assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF assign DivX = {3'b000, ~NumZeroE, XPreproc}; - - // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX; - else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; - + // Sqrt is initialized after a first step of R(X-1), which depends on Radix + if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; + else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; + assign PreShiftX = Sqrt ? SqrtX : DivX; + // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - - flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); endmodule From 2c6c3e799d3282dc4624039897da5a43230f8b13 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:00:48 -0800 Subject: [PATCH 08/15] Clean up sqrt preproc --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index e56456742..63d391ae9 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -104,7 +104,7 @@ module fdivsqrtpreproc ( assign p = ALTBE ? '0 : ZeroDiff; /* verilator lint_off WIDTH */ - // right shift amount to complete in discrete number of steps + // calculate number of cycles nE right shift amount RightShiftX to complete in discrete number of steps assign pPlusr = `LOGR + p; assign pPrTrunc = pPlusr % `RK; assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc; @@ -145,6 +145,7 @@ module fdivsqrtpreproc ( assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // append leading 1 (for nonzero inputs) and zero-extend + // *** explain this next line assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF assign DivX = {3'b000, ~NumZeroE, XPreproc}; // Sqrt is initialized after a first step of R(X-1), which depends on Radix From 55f25457c91fcf80b7a9e31ce1fd5032e2c12efd Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:01:44 -0800 Subject: [PATCH 09/15] Radix 4 divsqrt --- pipelined/config/shared/wally-shared.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index cc24c42f6..044bd7d7e 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -109,7 +109,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input From 1e65bfd0585c744d78c0120502fc9abb2224a961 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:10:47 -0800 Subject: [PATCH 10/15] simplified sign handling mux --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 6d955d611..1dd11b3fc 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -52,9 +52,6 @@ module fdivsqrtpostproc( logic [`DIVb:0] PreQmM; logic NegStickyM; logic weq0E, weq0M, WZeroM; - logic [`DIVBLEN:0] NormShiftM; - logic [`DIVb:0] NormQuotM; - logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM; logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM; logic [`XLEN-1:0] SpecialFPIntDivResultM; @@ -104,27 +101,17 @@ module fdivsqrtpostproc( assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; if (`IDIV_ON_FPU) begin + logic [`DIVBLEN:0] NormShiftM; + logic [`DIVb:0] NormQuotM; + logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM; + assign W = $signed(Sum) >>> `LOGR; assign DM = {4'b0001, D}; // Integer division: sign handling for div and rem - always_comb - if (~AsM) - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = W + DM; - end else begin - NormQuotM = FirstU; - NormRemM = W; - end - else - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = -(W + DM); - end else begin - NormQuotM = FirstU; - NormRemM = -W; - end + mux2 #(`DIVb+1) normquotmux(FirstU, FirstUM, NegStickyM, NormQuotM); + mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); + mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); // Integer division: Special cases always_comb From 6ae25537ea9ca626c8f00f05499eda2dc4ffe194 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:17:38 -0800 Subject: [PATCH 11/15] removed duplicate quotient mux --- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 1dd11b3fc..258f0eb57 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -102,14 +102,12 @@ module fdivsqrtpostproc( if (`IDIV_ON_FPU) begin logic [`DIVBLEN:0] NormShiftM; - logic [`DIVb:0] NormQuotM; logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM; assign W = $signed(Sum) >>> `LOGR; assign DM = {4'b0001, D}; - // Integer division: sign handling for div and rem - mux2 #(`DIVb+1) normquotmux(FirstU, FirstUM, NegStickyM, NormQuotM); + // Integer remainder: sticky and sign correction muxes mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); @@ -129,7 +127,7 @@ module fdivsqrtpostproc( IntRemM = '0; end end else begin - PreIntQuotM = {3'b000, NormQuotM}; + PreIntQuotM = {3'b000, PreQmM}; IntRemM = NormRemM; end // flip sign if necessary @@ -147,7 +145,7 @@ module fdivsqrtpostproc( end - // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted + // integer division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases From 1006305d759f45aefb6f7cfc1a63a0e4ab224dea Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:34:26 -0800 Subject: [PATCH 12/15] started simplifying integer division special cases --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 258f0eb57..80d9e4b01 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -111,6 +111,43 @@ module fdivsqrtpostproc( mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); + // special case logic + always_comb + if (ALTBM) begin + if (RemOpM) PreFPIntDivResultM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; + else PreFPIntDivResultM = '0; + // IntQuotM = '0; + // IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; + end else begin + logic [`DIVb+3:0] PreIntQuotM; + if (WZeroM) begin + if (weq0M) begin + PreIntQuotM = {3'b000, FirstU}; + IntRemM = '0; + end else begin + PreIntQuotM = {3'b000, FirstUM}; + IntRemM = '0; + end + end else begin + PreIntQuotM = {3'b000, PreQmM}; + IntRemM = NormRemM; + end + // flip sign if necessary + if (NegQuotM) IntQuotM = -PreIntQuotM; + else IntQuotM = PreIntQuotM; + if (RemOpM) begin + NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder + PreResultM = IntRemM; + end else begin + NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); + PreResultM = IntQuotM; + end + PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); + end + + assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases + +/* // Integer division: Special cases always_comb if (ALTBM) begin @@ -149,6 +186,7 @@ module fdivsqrtpostproc( assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases +*/ // sign extend result for W64 if (`XLEN==64) From 919525ca17e62942bf50eaf9411fe386b9e0fc98 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:40:28 -0800 Subject: [PATCH 13/15] continued simplifying integer division special cases --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 53 +++---------------- 1 file changed, 7 insertions(+), 46 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 80d9e4b01..c78738a4a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -113,9 +113,12 @@ module fdivsqrtpostproc( // special case logic always_comb - if (ALTBM) begin - if (RemOpM) PreFPIntDivResultM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; - else PreFPIntDivResultM = '0; + if (BZeroM) begin + if (RemOpM) SpecialFPIntDivResultM = AM; + else SpecialFPIntDivResultM = {(`XLEN){1'b1}}; + end else if (ALTBM) begin + if (RemOpM) SpecialFPIntDivResultM = AM; + else SpecialFPIntDivResultM = '0; // IntQuotM = '0; // IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; end else begin @@ -143,51 +146,9 @@ module fdivsqrtpostproc( PreResultM = IntQuotM; end PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); + SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0]; end - assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases - -/* - // Integer division: Special cases - always_comb - if (ALTBM) begin - IntQuotM = '0; - IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; - end else begin - logic [`DIVb+3:0] PreIntQuotM; - if (WZeroM) begin - if (weq0M) begin - PreIntQuotM = {3'b000, FirstU}; - IntRemM = '0; - end else begin - PreIntQuotM = {3'b000, FirstUM}; - IntRemM = '0; - end - end else begin - PreIntQuotM = {3'b000, PreQmM}; - IntRemM = NormRemM; - end - // flip sign if necessary - if (NegQuotM) IntQuotM = -PreIntQuotM; - else IntQuotM = PreIntQuotM; - end - - always_comb - if (RemOpM) begin - NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder - PreResultM = IntRemM; - end else begin - NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); - PreResultM = IntQuotM; - end - - - // integer division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted - - assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); - assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases -*/ - // sign extend result for W64 if (`XLEN==64) assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : From e0ec45489a7f200db4a58f5f20c3acda90c3170b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 30 Dec 2022 10:51:35 -0600 Subject: [PATCH 14/15] Updated constraints to remove DivBusyE. --- fpga/constraints/debug2.xdc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index eed201843..bdc073ee1 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -324,9 +324,9 @@ set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe62] connect_debug_port u_ila_0/probe62 [get_nets [list wallypipelinedsoc/core/hzu/FCvtIntStallD ]] create_debug_port u_ila_0 probe -set_property port_width 1 [get_debug_ports u_ila_0/probe63] +set_property port_width 7 [get_debug_ports u_ila_0/probe63] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe63] -connect_debug_port u_ila_0/probe63 [get_nets [list wallypipelinedsoc/core/hzu/DivBusyE ]] +connect_debug_port u_ila_0/probe63 [get_nets [list {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][1]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][2]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][3]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][4]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][5]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][6]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][7]} ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe64] @@ -1148,7 +1148,3 @@ set_property port_width 53 [get_debug_ports u_ila_0/probe224] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe224] connect_debug_port u_ila_0/probe224 [get_nets [list {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][1]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][2]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][3]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][4]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][5]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][6]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][7]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][8]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][9]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][10]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][11]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][12]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][13]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][14]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][15]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][16]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][17]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][18]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][19]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][20]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][21]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][22]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][23]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][24]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][25]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][26]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][27]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][28]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][29]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][30]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][31]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][32]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][33]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][34]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][35]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][36]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][37]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][38]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][39]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][40]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][41]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][42]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][43]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][44]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][45]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][46]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][47]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][48]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][49]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][50]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][51]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][52]} {wallypipelinedsoc/uncore.uncore/plic.plic/irqs_at_max_priority[0][53]} ]] -create_debug_port u_ila_0 probe -set_property port_width 7 [get_debug_ports u_ila_0/probe225] -set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe225] -connect_debug_port u_ila_0/probe225 [get_nets [list {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][1]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][2]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][3]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][4]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][5]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][6]} {wallypipelinedsoc/uncore.uncore/plic.plic/threshMask[0][7]} ]] From 668c698bb491dc3e1e1310d55ac69ada994976f5 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 30 Dec 2022 12:07:44 -0600 Subject: [PATCH 15/15] removed ethe second bit from fma alignment shift --- pipelined/config/shared/wally-shared.vh | 4 +- pipelined/src/fpu/fma/fma.sv | 60 +++++++++-------- pipelined/src/fpu/fma/fmaadd.sv | 12 ++-- pipelined/src/fpu/fma/fmaalign.sv | 25 ++++---- pipelined/src/fpu/fma/fmalza.sv | 12 ++-- pipelined/src/fpu/fpu.sv | 18 +++--- pipelined/src/fpu/postproc/fmashiftcalc.sv | 28 ++++---- pipelined/src/fpu/postproc/postprocess.sv | 37 ++++++----- pipelined/src/fpu/postproc/round.sv | 4 +- pipelined/src/fpu/postproc/shiftcorrection.sv | 4 +- pipelined/testbench/testbench-fp.sv | 64 +++++++++---------- 11 files changed, 137 insertions(+), 131 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index e047d947a..5c210ebdc 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -104,9 +104,9 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+7) ? (`QLEN+`NF+1) : (3*`NF+7))//change +`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+6) ? (`QLEN+`NF+1) : (3*`NF+6)) `define LOGNORMSHIFTSZ ($clog2(`NORMSHIFTSZ)) -`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change +`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+6) ? (`DIVRESLEN+`NF) : (3*`NF+4)) // division constants `define RADIX 32'h4 diff --git a/pipelined/src/fpu/fma/fma.sv b/pipelined/src/fpu/fma/fma.sv index 4d60f477f..ed854c0a1 100644 --- a/pipelined/src/fpu/fma/fma.sv +++ b/pipelined/src/fpu/fma/fma.sv @@ -31,27 +31,37 @@ `include "wally-config.vh" module fma( - input logic Xs, Ys, Zs, // input's signs - input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format - input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format - input logic XZero, YZero, ZZero, // is the input zero - input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - output logic ASticky, // sticky bit that is calculated during alignment - output logic [3*`NF+4:0] Sm,//change // the positive sum's significand - output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) - output logic As, // the aligned addend's sign (modified Z sign for other opperations) - output logic Ps, // the product's sign - output logic Ss, // the sum's sign - output logic [`NE+1:0] Se, - output logic [$clog2(3*`NF+6)-1:0] SCnt//change // normalization shift count + input logic Xs, Ys, Zs, // input's signs + input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format + input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format + input logic XZero, YZero, ZZero, // is the input zero + input logic [2:0] OpCtrl, // operation control + output logic ASticky, // sticky bit that is calculated during alignment + output logic [3*`NF+3:0] Sm, // the positive sum's significand + output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) + output logic As, // the aligned addend's sign (modified Z sign for other opperations) + output logic Ps, // the product's sign + output logic Ss, // the sum's sign + output logic [`NE+1:0] Se, // the sum's exponent + output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count ); - logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*`NF+4:0] Am;//change // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+4:0] AmInv; //change // aligned addend's mantissa possibly inverted - logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed - logic KillProd; // set the product to zero before addition if the product is too small to matter - logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign + // OpCtrl: + // Fma: {not multiply-add?, negate prod?, negate Z?} + // 000 - fmadd + // 001 - fmsub + // 010 - fnmsub + // 011 - fnmadd + // 100 - mul + // 110 - add + // 111 - sub + + logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format + logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) + logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted + logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) + logic KillProd; // set the product to zero before addition if the product is too small to matter + logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -68,25 +78,23 @@ module fma( // multiplication of the mantissa's fmamult mult(.Xm, .Ym, .Pm); - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd); - - // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - //change - fmalza #(3*`NF+5) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ASticky&KillProd}), .Cin(InvA & ~(ASticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza #(3*`NF+4) lza(.A(AmInv), .Pm({PmKilled, InvA&Ps&ASticky&KillProd}), .Cin(InvA & ~(ASticky & ~KillProd)), .sub(InvA), .SCnt); + endmodule diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index 509adb674..7ce641dbe 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fmaadd( - input logic [3*`NF+4:0] Am, //change // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic InvA, // invert the aligned addend @@ -39,13 +39,13 @@ module fmaadd( input logic ASticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+4:0] AmInv,//change // aligned addend possibly inverted + output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic Ss, output logic [`NE+1:0] Se, - output logic [3*`NF+4:0] Sm//change // the positive sum + output logic [3*`NF+3:0] Sm // the positive sum ); - logic [3*`NF+4:0] PreSum, NegPreSum;//change // possibly negitive sum + logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum logic [3*`NF+5:0] PreSumdebug, NegPreSumdebug; // possibly negitive sum logic NegSum; // was the sum negitive logic NegSumdebug; // was the sum negitive @@ -66,8 +66,8 @@ module fmaadd( // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA // in this case this result is only ever selected when InvA=1 so we can remove &InvA - assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+5{1'b0}}, (~ASticky|KillProd)&InvA};//change - assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 2'b0};//change + assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; + assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0}; // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; diff --git a/pipelined/src/fpu/fma/fmaalign.sv b/pipelined/src/fpu/fma/fmaalign.sv index 67dc0b824..fc4d9c614 100644 --- a/pipelined/src/fpu/fma/fmaalign.sv +++ b/pipelined/src/fpu/fma/fmaalign.sv @@ -35,16 +35,15 @@ module fmaalign( input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero - output logic [3*`NF+4:0] Am,//change // addend aligned for addition in U(NF+5.2NF+1) + output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+4:0] ZmShifted;//change // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+4:0] ZmPreshifted;//change // input to the alignment shifter U(NF+5.3NF+1) + logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; - logic PmSticky, tmpZmSticky; /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -57,38 +56,38 @@ module fmaalign( assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze}; // Defualt Addition with only inital left shift - // | 53'b0 | 106'b(product) | 2'b0 | + // | 53'b0 | 106'b(product) | 1'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*`NF+4)'(0)}; //change + assign ZmPreshifted = {Zm,(3*`NF+3)'(0)}; assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(4));//change + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3)); always_comb begin // If the product is too small to effect the sum, kill the product - // | 54'b0 | 106'b(product) | 2'b0 | + // | 53'b0 | 106'b(product) | 1'b0 | // | addnend | if (KillProd) begin - ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+2)'(0)};//change + ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)}; ASticky = ~(XZero|YZero); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the product to be considered too small // - The 2 extra bits are needed for rounding - // | 54'b0 | 106'b(product) | 2'b0 | + // | 53'b0 | 106'b(product) | 1'b0 | // | addnend | end else if (KillZ) begin ZmShifted = 0; ASticky = ~ZZero; // If the Addend is shifted right - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | + // | 53'b0 | 106'b(product) | 1'b0 | + // | addnend | end else begin ZmShifted = ZmPreshifted >> ACnt; ASticky = |(ZmShifted[`NF-1:0]); @@ -96,7 +95,7 @@ module fmaalign( end end - assign Am = ZmShifted[4*`NF+4:`NF];//change + assign Am = ZmShifted[4*`NF+3:`NF]; endmodule diff --git a/pipelined/src/fpu/fma/fmalza.sv b/pipelined/src/fpu/fma/fmalza.sv index 182075bde..9a0de74c8 100644 --- a/pipelined/src/fpu/fma/fmalza.sv +++ b/pipelined/src/fpu/fma/fmalza.sv @@ -31,18 +31,18 @@ `include "wally-config.vh" module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [WIDTH-1:0] A, // addend - input logic [2*`NF+3:0] Pm, // product - input logic Cin, // carry in - input logic sub, - output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result + input logic [WIDTH-1:0] A, // addend + input logic [2*`NF+2:0] Pm, // product + input logic Cin, // carry in + input logic sub, + output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result ); logic [WIDTH:0] F; logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; - assign B = {{(`NF+1){1'b0}}, Pm};//change // Zero extend product + assign B = {{(`NF+1){1'b0}}, Pm}; // Zero extend product assign P = A^B; assign G = A&B; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 507cd9086..1f749e9e0 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -109,14 +109,14 @@ module fpu ( logic XExpMaxE; // is the exponent all ones (max value) // Fma Signals - logic [3*`NF+4:0] SmE, SmM;//change - logic ZmStickyE, ZmStickyM; + logic [3*`NF+3:0] SmE, SmM; + logic FmaAStickyE, FmaAStickyM; logic [`NE+1:0] SeE,SeM; logic InvAE, InvAM; logic AsE, AsM; logic PsE, PsM; logic SsE, SsM; - logic [$clog2(3*`NF+6)-1:0] SCntE, SCntM;//change + logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // Cvt Signals logic [`NE:0] CeE, CeM; // the calculated expoent @@ -258,7 +258,7 @@ module fpu ( .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), - .ASticky(ZmStickyE)); + .ASticky(FmaAStickyE)); // divide and squareroot // - fdiv @@ -352,10 +352,10 @@ module fpu ( {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(3*`NF+5) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);//change - flopenrc #($clog2(3*`NF+6)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, //change - {ZmStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, - {ZmStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); + flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, + {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE}, {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM}); @@ -375,7 +375,7 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), - .FmaZmS(ZmStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */ .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), diff --git a/pipelined/src/fpu/postproc/fmashiftcalc.sv b/pipelined/src/fpu/postproc/fmashiftcalc.sv index 5c301da7d..1110b70f0 100644 --- a/pipelined/src/fpu/postproc/fmashiftcalc.sv +++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv @@ -30,18 +30,18 @@ `include "wally-config.vh" module fmashiftcalc( - input logic [3*`NF+4:0] FmaSm,//change // the positive sum - input logic [$clog2(3*`NF+6)-1:0] FmaSCnt,//change // normalization shift count - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [`NE+1:0] FmaSe, - output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic FmaSZero, // is the result denormalized - calculated before LZA corection - output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection - output logic [$clog2(3*`NF+6)-1:0] FmaShiftAmt,//change // normalization shift count - output logic [3*`NF+6:0] FmaShiftIn//change // is the sum zero + input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [`NE+1:0] FmaSe, // sum's exponent + output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + output logic FmaSZero, // is the result denormalized - calculated before LZA corection + output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection + output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count + output logic [3*`NF+5:0] FmaShiftIn // is the sum zero ); - logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias - logic [`NE+1:0] BiasCorr; + logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] BiasCorr; // correction for bias /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -50,7 +50,7 @@ module fmashiftcalc( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+6)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);//change + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -150,7 +150,7 @@ module fmashiftcalc( // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {2'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2): FmaSCnt+1;//change + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2)+BiasCorr[$clog2(3*`NF+6)-1:0]: FmaSCnt+1;//change + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index 7c758b28c..4637f3707 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -32,28 +32,27 @@ module postprocess ( // general signals - input logic Xs, Ys, // input signs + input logic Xs, Ys, // input signs input logic [`NF:0] Xm, Ym, Zm, // input mantissas - input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic XZero, YZero, ZZero, // inputs are zero input logic XInf, YInf, ZInf, // inputs are infinity input logic XNaN, YNaN, ZNaN, // inputs are NaN input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs - input logic ZDenorm, // is the original precision denormalized - input logic [1:0] PostProcSel, // select result to be written to fp register + input logic ZDenorm, // is the original precision denormalized + input logic [1:0] PostProcSel, // select result to be written to fp register //fma signals - input logic FmaAs, // the modified Z sign - depends on instruction - input logic FmaPs, // the product's sign - input logic [`NE+1:0] FmaSe, - input logic [3*`NF+4:0] FmaSm,//change // the positive sum - input logic FmaZmS, // sticky bit that is calculated during alignment - input logic FmaSs, - input logic [$clog2(3*`NF+6)-1:0] FmaSCnt,//change // the normalization shift count + input logic FmaAs, // the modified Z sign - depends on instruction + input logic FmaPs, // the product's sign + input logic [`NE+1:0] FmaSe, // the sum's exponent + input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic FmaASticky, // sticky bit that is calculated during alignment + input logic FmaSs, // + input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivS, -// input logic DivDone, input logic [`NE+1:0] DivQe, input logic [`DIVb:0] DivQm, // conversion signals @@ -89,10 +88,10 @@ module postprocess ( // fma signals logic [`NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*`NF+6:0] FmaShiftIn;//change // shift input + logic [3*`NF+5:0] FmaShiftIn; // shift input logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection - logic [$clog2(3*`NF+6)-1:0] FmaShiftAmt;//change // normalization shift count + logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; @@ -152,8 +151,8 @@ module postprocess ( always_comb case(PostProcSel) 2'b10: begin // fma - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+6){1'b0}}, FmaShiftAmt};//change - ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+7){1'b0}}};//change + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}}; end 2'b00: begin // cvt ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; @@ -193,7 +192,7 @@ module postprocess ( roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, .DivS, //.DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me); diff --git a/pipelined/src/fpu/postproc/round.sv b/pipelined/src/fpu/postproc/round.sv index c9e2b94e4..b24884dbf 100644 --- a/pipelined/src/fpu/postproc/round.sv +++ b/pipelined/src/fpu/postproc/round.sv @@ -48,7 +48,7 @@ module round( input logic CvtResDenormUf, input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Mf, - input logic FmaZmS, // addend's sticky bit + input logic FmaASticky, // addend's sticky bit input logic [`NE+1:0] FmaMe, // exponent of the normalized sum input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -175,7 +175,7 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign S = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; + assign S = FmaASticky&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag diff --git a/pipelined/src/fpu/postproc/shiftcorrection.sv b/pipelined/src/fpu/postproc/shiftcorrection.sv index 588daa945..172180465 100644 --- a/pipelined/src/fpu/postproc/shiftcorrection.sv +++ b/pipelined/src/fpu/postproc/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] Qe, output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); - logic [3*`NF+4:0] CorrSumShifted;//change // the shifted sum after LZA correction + logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction logic [`CORRSHIFTSZ-1:0] CorrQmShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction @@ -56,7 +56,7 @@ module shiftcorrection( assign CorrQmShifted = (LZAPlus1|(DivQe==1&~LZAPlus1)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+5){1'b0}}};//change + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index f5986c839..ac81a1376 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -53,39 +53,39 @@ module testbenchfp; logic [`FLEN*4+7:0] TestVectors[8388609:0]; // list of test vectors logic [1:0] FmtVal; // value of the current Fmt - logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal + logic [2:0] UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal logic WriteIntVal; // value of the current WriteInt logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat logic [`XLEN-1:0] SrcA; // integer input logic [`FLEN-1:0] Ans; // correct answer from TestFloat - logic [`FLEN-1:0] Res; // result from other units - logic [4:0] AnsFlg; // correct flags read from testfloat - logic [4:0] ResFlg, Flg; // Result flags - logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad - logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit - logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit + logic [`FLEN-1:0] Res; // result from other units + logic [4:0] AnsFlg; // correct flags read from testfloat + logic [4:0] ResFlg, Flg; // Result flags + logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad + logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit + logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags logic AnsNaN, ResNaN, NaNGood; - logic Xs, Ys, Zs; // sign of the inputs - logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs - logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs - logic XNaN, YNaN, ZNaN; // is the input NaN - logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN - logic XDenorm, ZDenorm; // is the input denormalized - logic XInf, YInf, ZInf; // is the input infinity - logic XZero, YZero, ZZero; // is the input zero - logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones - logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) - logic IntZero; - logic CvtResSgnE; - logic [`NE:0] CvtCalcExpE; // the calculated expoent + logic Xs, Ys, Zs; // sign of the inputs + logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs + logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs + logic XNaN, YNaN, ZNaN; // is the input NaN + logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN + logic XDenorm, ZDenorm; // is the input denormalized + logic XInf, YInf, ZInf; // is the input infinity + logic XZero, YZero, ZZero; // is the input zero + logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones + logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic IntZero; + logic CvtResSgnE; + logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`DIVb:0] Quot; - logic CvtResDenormUfE; - logic DivStart, FDivBusyE, OldFDivBusyE; - logic reset = 1'b0; + logic [`DIVb:0] Quot; + logic CvtResDenormUfE; + logic DivStart, FDivBusyE, OldFDivBusyE; + logic reset = 1'b0; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DURLEN-1:0] Dur; + logic [`DURLEN-1:0] Dur; // in-between FMA signals logic Mult; @@ -94,17 +94,17 @@ module testbenchfp; logic [`NE+1:0] Se; logic ASticky; logic KillProd; - logic [$clog2(3*`NF+6)-1:0] SCnt; - logic [3*`NF+4:0] Sm; + logic [$clog2(3*`NF+5)-1:0] SCnt; + logic [3*`NF+3:0] Sm; logic InvA; logic NegSum; logic As; logic Ps; - logic DivSticky; - logic DivDone; - logic DivNegSticky; - logic [`NE+1:0] DivCalcExp; - logic divsqrtop; + logic DivSticky; + logic DivDone; + logic DivNegSticky; + logic [`NE+1:0] DivCalcExp; + logic divsqrtop; ///////////////////////////////////////////////////////////////////////////////////////////////