From 1bd6351e1fd4b0f2ad0e248cd7999d2e098ba189 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 29 Jul 2022 22:54:49 +0000 Subject: [PATCH 01/16] re-added FStore2 in Cache --- pipelined/src/cache/cache.sv | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index d28697e21..609810e8c 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -162,12 +162,18 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER logic [LINELEN-1:0] FinalWriteDataDup; assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}}; - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + if(`LLEN>`XLEN)begin + logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; + end else + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); for(index = 0; index < 2**LOGWPL; index++) begin assign DemuxedByteMask[(index+1)*(`XLEN/8)-1:index*(`XLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end - // *** have to add back in fstore2 + assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes. From 257107f908519683bdadd5c9faf8e9b26c0957ee Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 18:07:38 +0000 Subject: [PATCH 02/16] Partitioned fma into separate files --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/fpu/fma.sv | 231 +-------------------------- pipelined/src/fpu/fmaadd.sv | 83 ++++++++++ pipelined/src/fpu/fmaalign.sv | 101 ++++++++++++ pipelined/src/fpu/fmaexpadd.sv | 42 +++++ pipelined/src/fpu/fmalza.sv | 62 +++++++ pipelined/src/fpu/fmamult.sv | 38 +++++ pipelined/src/fpu/fmasign.sv | 47 ++++++ pipelined/testbench/tests.vh | 3 +- 9 files changed, 384 insertions(+), 225 deletions(-) create mode 100644 pipelined/src/fpu/fmaadd.sv create mode 100644 pipelined/src/fpu/fmaalign.sv create mode 100644 pipelined/src/fpu/fmaexpadd.sv create mode 100644 pipelined/src/fpu/fmalza.sv create mode 100644 pipelined/src/fpu/fmamult.sv create mode 100644 pipelined/src/fpu/fmasign.sv diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 8b5b5d628..7afcadb2e 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph" +vsim -c -do "do wally-pipelined-batch.do rv64gc arch64d" diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 067147ee6..fcf209f6b 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // -// Written: me@KatherineParry.com, David Harris -// Modified: 6/23/2021 +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: // // Purpose: Floating point multiply-accumulate of configurable size // @@ -63,18 +63,18 @@ module fma( // calculate the product's exponent - expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); + fmaexpadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); // multiplication of the mantissa's - mult mult(.Xm, .Ym, .Pm); + fmamult mult(.Xm, .Ym, .Pm); /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); - align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); @@ -83,223 +83,8 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); + fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule - -module expadd( - input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad - input logic [`NE-1:0] Xe, Ye, // input's exponents - input logic XZero, YZero, // are the inputs zero - output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 -); - - // kill the exponent if the product is zero - either X or Y is 0 - assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; - -endmodule - - - - - -module mult( - input logic [`NF:0] Xm, Ym, - output logic [2*`NF+1:0] Pm -); - assign Pm = Xm * Ym; -endmodule - - - - - - - - -module sign( - input logic [2:0] OpCtrl, // opperation contol - input logic Xs, Ys, Zs, // sign of the inputs - output logic Ps, // the product's sign - takes opperation into account - output logic As // aligned addend sign used in fma - takes opperation into account -); - - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - - // flip is negation opperation - assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); - // flip if subtraction - assign As = Zs^OpCtrl[0]; - -endmodule - - - - - - - - -module align( - input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format - input logic [`NF:0] Zm, // significand in U(0.NF) format] - input logic XZero, YZero, ZZero, // is the input zero - output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) - output logic ZmSticky, // Sticky bit calculated from the aliged addend - output logic KillProd // should the product be set to zero -); - - logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) - logic KillZ; - - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// - - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; - - // Defualt Addition without shifting - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - - // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - - assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); - - always_comb - begin - - // If the product is too small to effect the sum, kill the product - - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - if (KillProd) begin - ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; - ZmSticky = ~(XZero|YZero); - - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the addend to be considered too small - // - The 2 extra bits are needed for rounding - - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if (KillZ) begin - ZmShifted = 0; - ZmSticky = ~ZZero; - - // If the Addend is shifted right - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else begin - ZmShifted = ZmPreshifted >> ACnt; - ZmSticky = |(ZmShifted[`NF-1:0]); - - end - end - - assign Am = ZmShifted[4*`NF+5:`NF]; - -endmodule - - - - - - - -module add( - input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) - input logic [2*`NF+1:0] Pm, // the product's mantissa - input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic KillProd, // should the product be set to 0 - input logic ZmSticky, - input logic [`NE-1:0] Ze, - input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed - output logic NegSum, // was the sum negitive - output logic InvA, // do you invert the aligned addend - output logic Ss, - output logic [`NE+1:0] Se, - output logic [3*`NF+5:0] Sm // the positive sum -); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum - - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvA = As ^ Ps; - - // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = Pm&{2*`NF+2{~KillProd}}; - // Do the addition - // - calculate a positive and negitive sum in parallel - // Zsticky Psticky - // PreSum -1 = don't add 1 +1 = add 2 - // NegPreSum +1 = add 2 -1 = don't add 1 - // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; - - // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; - - // Choose the positive sum and accompanying LZA result. - assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign Ss = NegSum^Ps; //*** move to execute stage - assign Se = KillProd ? {2'b0, Ze} : Pe; -endmodule - - -module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result - ); - - logic [3*`NF+6:0] T; - logic [3*`NF+6:0] G; - logic [3*`NF+6:0] Z; - logic [3*`NF+6:0] f; - - assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+6:2*`NF+4] = 0; - assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; - assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; - assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; - - - // Apply function to determine Leading pattern - // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~T[n]&T[n-1] note: n is the MSB - //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) - assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; - assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - - - - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); - -endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv new file mode 100644 index 000000000..4b52208c6 --- /dev/null +++ b/pipelined/src/fpu/fmaadd.sv @@ -0,0 +1,83 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA significand adder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaadd( + input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [2*`NF+1:0] Pm, // the product's mantissa + input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic KillProd, // should the product be set to 0 + input logic ZmSticky, + input logic [`NE-1:0] Ze, + input logic [`NE+1:0] Pe, + output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed + output logic NegSum, // was the sum negitive + output logic InvA, // do you invert the aligned addend + output logic Ss, + output logic [`NE+1:0] Se, + output logic [3*`NF+5:0] Sm // the positive sum +); + logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Negate Z when doing one of the following opperations: + // -prod + Z + // prod - Z + assign InvA = As ^ Ps; + + // Choose an inverted or non-inverted addend - the one has to be added now for the LZA + assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign PmKilled = Pm&{2*`NF+2{~KillProd}}; + // Do the addition + // - calculate a positive and negitive sum in parallel + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + + // Is the sum negitive + assign NegSum = PreSum[3*`NF+6]; + + // Choose the positive sum and accompanying LZA result. + assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; //*** move to execute stage + assign Se = KillProd ? {2'b0, Ze} : Pe; +endmodule diff --git a/pipelined/src/fpu/fmaalign.sv b/pipelined/src/fpu/fmaalign.sv new file mode 100644 index 000000000..f7c849993 --- /dev/null +++ b/pipelined/src/fpu/fmaalign.sv @@ -0,0 +1,101 @@ + +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA alginment shift +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaalign( + input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format + input logic [`NF:0] Zm, // significand in U(0.NF) format] + input logic XZero, YZero, ZZero, // is the input zero + output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic ZmSticky, // Sticky bit calculated from the aliged addend + output logic KillProd // should the product be set to zero +); + + logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic KillZ; + + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// + + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; + + // Defualt Addition without shifting + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + + // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) + assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; + + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); + + always_comb + begin + + // If the product is too small to effect the sum, kill the product + + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + if (KillProd) begin + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; + ZmSticky = ~(XZero|YZero); + + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the addend to be considered too small + // - The 2 extra bits are needed for rounding + + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if (KillZ) begin + ZmShifted = 0; + ZmSticky = ~ZZero; + + // If the Addend is shifted right + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else begin + ZmShifted = ZmPreshifted >> ACnt; + ZmSticky = |(ZmShifted[`NF-1:0]); + + end + end + + assign Am = ZmShifted[4*`NF+5:`NF]; + +endmodule + diff --git a/pipelined/src/fpu/fmaexpadd.sv b/pipelined/src/fpu/fmaexpadd.sv new file mode 100644 index 000000000..1d208327b --- /dev/null +++ b/pipelined/src/fpu/fmaexpadd.sv @@ -0,0 +1,42 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA exponent addition +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmaexpadd( + input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad + input logic [`NE-1:0] Xe, Ye, // input's exponents + input logic XZero, YZero, // are the inputs zero + output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 +); + + // kill the exponent if the product is zero - either X or Y is 0 + assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv new file mode 100644 index 000000000..3baaf2a08 --- /dev/null +++ b/pipelined/src/fpu/fmalza.sv @@ -0,0 +1,62 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: Leading Zero Anticipator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] + input logic [3*`NF+6:0] A, // addend + input logic [2*`NF+3:0] P, // product + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + ); + + logic [3*`NF+6:0] T; + logic [3*`NF+6:0] G; + logic [3*`NF+6:0] Z; + logic [3*`NF+6:0] f; + + assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; + assign G[3*`NF+6:2*`NF+4] = 0; + assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; + + + // Apply function to determine Leading pattern + // - note: the paper linked above uses the numbering system where 0 is the most significant bit + //f[n] = ~T[n]&T[n-1] note: n is the MSB + //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) + assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; + assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + + + + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); + +endmodule diff --git a/pipelined/src/fpu/fmamult.sv b/pipelined/src/fpu/fmamult.sv new file mode 100644 index 000000000..1e1b0981e --- /dev/null +++ b/pipelined/src/fpu/fmamult.sv @@ -0,0 +1,38 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA Significand Multiplier +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmamult( + input logic [`NF:0] Xm, Ym, + output logic [2*`NF+1:0] Pm +); + assign Pm = Xm * Ym; +endmodule + diff --git a/pipelined/src/fpu/fmasign.sv b/pipelined/src/fpu/fmasign.sv new file mode 100644 index 000000000..66c1af83a --- /dev/null +++ b/pipelined/src/fpu/fmasign.sv @@ -0,0 +1,47 @@ +/////////////////////////////////////////// +// +// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu +// Modified: +// +// Purpose: FMA Sign Logic +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fmasign( + input logic [2:0] OpCtrl, // opperation contol + input logic Xs, Ys, Zs, // sign of the inputs + output logic Ps, // the product's sign - takes opperation into account + output logic As // aligned addend sign used in fma - takes opperation into account +); + + // Calculate the product's sign + // Negate product's sign if FNMADD or FNMSUB + + // flip is negation opperation + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); + // flip if subtraction + assign As = Zs^OpCtrl[0]; + +endmodule diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 587733c39..fe3bd62f4 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1902,7 +1902,8 @@ string imperas32f[] = '{ "rv32i_m/privilege/src/WALLY-gpio-01.S", "rv32i_m/privilege/src/WALLY-clint-01.S", "rv32i_m/privilege/src/WALLY-uart-01.S", - "rv32i_m/privilege/src/WALLY-plic-01.S" + "rv32i_m/privilege/src/WALLY-plic-01.S", + "rv32i_m/privilege/src/WALLY-plic-s-01.S" }; From 7f9b6014670f4f866c1f83e6c9d43a52fe22a239 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 18:23:39 +0000 Subject: [PATCH 03/16] fmalza edits to match textbook --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmalza.sv | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index fcf209f6b..5f595b1fc 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,6 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 3baaf2a08..a05084e2d 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -31,32 +31,27 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] P, // product + input logic [2*`NF+3:0] Pm, // product output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); + + localparam WIDTH = 3*`NF+7; - logic [3*`NF+6:0] T; - logic [3*`NF+6:0] G; - logic [3*`NF+6:0] Z; - logic [3*`NF+6:0] f; + logic [WIDTH-1:0] B, P, G, K, F; + logic [WIDTH-1:0] Pp1, Gm1, Km1; - assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+6:2*`NF+4] = 0; - assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; - assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; - assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; + assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~T[n]&T[n-1] note: n is the MSB - //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) - assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; - assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + //f[n] = ~P[n]&P[n-1] note: n is the MSB + //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) + assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; + assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - - - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); - + lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 3c08aabcd355cce36a39bad253e1f57a3bfc0464 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 11:36:21 -0700 Subject: [PATCH 04/16] LZA refactoring --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmalza.sv | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 5f595b1fc..e698cdaf2 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,6 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index a05084e2d..e69ba73f4 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -30,28 +30,34 @@ `include "wally-config.vh" module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] Pm, // product - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + input logic [3*`NF+6:0] A, // addend + input logic [2*`NF+3:0] Pm, // product + input logic Cin, // carry in + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] B, P, G, K, F; - logic [WIDTH-1:0] Pp1, Gm1, Km1; + logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign AA = A + Cin; - assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign P = AA^B; + assign G = AA&B; + assign K= ~AA&~B; + assign Pp1 = P[WIDTH-1:1]; + assign Gm1 = {G[WIDTH-3:0], Cin}; + assign Km1 = {K[WIDTH-3:0], ~Cin}; + // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 99462049e7998190cad96ff49234ee99c69634e8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:20:23 -0700 Subject: [PATCH 05/16] LZA refactoring switched to Pp1, Gm1, Km1 --- pipelined/src/fpu/fmalza.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index e69ba73f4..afffca472 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -42,11 +42,11 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product - assign AA = A + Cin; +// assign AA = A + Cin; - assign P = AA^B; - assign G = AA&B; - assign K= ~AA&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; assign Pp1 = P[WIDTH-1:1]; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -57,7 +57,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From b34d2065c392d60cc4b80bb2d5e4adba5b582fff Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:30:42 -0700 Subject: [PATCH 06/16] LZA cleanup --- pipelined/src/fpu/fmalza.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index afffca472..b7b40091d 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -38,7 +38,8 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-1:0] B,F; + logic [WIDTH-1:0] P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product @@ -48,7 +49,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, assign G = A&B; assign K= ~A&~B; - assign Pp1 = P[WIDTH-1:1]; + assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; From 2869d67e50c06e8746d90c10a3b65e401141467a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:34:00 -0700 Subject: [PATCH 07/16] more lza cleanup --- pipelined/src/fpu/fmalza.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index b7b40091d..f70b1bc93 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,15 +39,17 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; logic [WIDTH-1:0] B,F; - logic [WIDTH-1:0] P, G, K; + logic [WIDTH-1:0] P, G; + logic [WIDTH-2:0] K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product -// assign AA = A + Cin; + // next steps***replace P[WIDTH-1] with sub, then remove one bit from A + assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; + assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -58,7 +60,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); + assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 8ff3a693af891c99945ebac02205a2da7bb92ba5 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 1 Aug 2022 19:56:25 +0000 Subject: [PATCH 08/16] regression passes fpu tests --- pipelined/src/fpu/fpu.sv | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index d98079b2e..4b7a1ffea 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -319,10 +319,23 @@ module fpu ( assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU + + logic [`FLEN-1:0] SgnExtXE; + generate + if(`FPSIZES == 1) + assign SgnExtXE = XE; + else if(`FPSIZES == 2) + mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE); + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]}, + {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, + {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, + XE, FmtE, SgnExtXE); // NaN boxing zeroes + endgenerate if (`FLEN>`XLEN) - assign IntSrcXE = XE[`XLEN-1:0]; + assign IntSrcXE = SgnExtXE[`XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE}; + assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE}; mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok From d6b5e7a6ef60f3d1a45554319b932a904ebfd46f Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:37:09 -0700 Subject: [PATCH 09/16] lza cleanup --- pipelined/src/fpu/fma.sv | 3 ++- pipelined/src/fpu/fmalza.sv | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index e698cdaf2..68a509677 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule + diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index f70b1bc93..9de1d745e 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -32,26 +32,27 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+3:0] Pm, // product - input logic Cin, // carry in + input logic Cin, // carry in + input logic sub, output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); localparam WIDTH = 3*`NF+7; logic [WIDTH-1:0] B,F; - logic [WIDTH-1:0] P, G; - logic [WIDTH-2:0] K; + logic [WIDTH-2:0] P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product // next steps***replace P[WIDTH-1] with sub, then remove one bit from A - assign P = A^B; + assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; - assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; + assign Pp1 = {sub, P[WIDTH-2:1]}; +// assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; @@ -59,7 +60,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) - assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; + assign F[WIDTH-1] = ~sub&P[WIDTH-2]; assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); From c3e9719c991d2da019341cd46801decbcf8f8467 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:40:12 -0700 Subject: [PATCH 10/16] lza cleanup --- pipelined/src/fpu/fmalza.sv | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 9de1d745e..c86459edb 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,14 +39,12 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] B,F; - logic [WIDTH-2:0] P, G, K; + logic [WIDTH-1:0] F; + logic [WIDTH-2:0] B, P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; - assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - // next steps***replace P[WIDTH-1] with sub, then remove one bit from A - assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; From f56b26ec400add075116f44f61c2e8b0f1399d05 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:43:48 -0700 Subject: [PATCH 11/16] lza cleanup --- pipelined/src/fpu/fma.sv | 5 +++-- pipelined/src/fpu/fmalza.sv | 9 ++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 68a509677..dec492eba 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -84,8 +84,9 @@ module fma( // /////////////////////////////////////////////////////////////////////////////// fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + + + fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index c86459edb..d70f0267c 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend + input logic [3*`NF+5:0] A, // addend input logic [2*`NF+3:0] Pm, // product input logic Cin, // carry in input logic sub, @@ -45,12 +45,11 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - assign P = A[WIDTH-2:0]^B[WIDTH-2:0]; - assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; - assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; + assign P = A[WIDTH-2:0]^B; + assign G = A[WIDTH-2:0]&B; + assign K= ~A[WIDTH-2:0]&~B; assign Pp1 = {sub, P[WIDTH-2:1]}; -// assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; From 91597bba87da84e99c683edfbd30937db56720ed Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 15:47:03 -0700 Subject: [PATCH 12/16] lza cleanup --- pipelined/src/fpu/fmalza.sv | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index d70f0267c..d71b398e7 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -37,28 +37,28 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); - localparam WIDTH = 3*`NF+7; + localparam WIDTH = 3*`NF+6; - logic [WIDTH-1:0] F; - logic [WIDTH-2:0] B, P, G, K; - logic [WIDTH-2:0] Pp1, Gm1, Km1; + logic [WIDTH:0] F; + logic [WIDTH-1:0] B, P, G, K; + logic [WIDTH-1:0] Pp1, Gm1, Km1; assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product - assign P = A[WIDTH-2:0]^B; - assign G = A[WIDTH-2:0]&B; - assign K= ~A[WIDTH-2:0]&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; - assign Pp1 = {sub, P[WIDTH-2:1]}; - assign Gm1 = {G[WIDTH-3:0], Cin}; - assign Km1 = {K[WIDTH-3:0], ~Cin}; + assign Pp1 = {sub, P[WIDTH-1:1]}; + assign Gm1 = {G[WIDTH-2:0], Cin}; + assign Km1 = {K[WIDTH-2:0], ~Cin}; // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) - assign F[WIDTH-1] = ~sub&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); + assign F[WIDTH] = ~sub&P[WIDTH-1]; + assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); - lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 3b937b73fdade1a6b1bf2a36233aa912db2f4414 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:01:02 -0700 Subject: [PATCH 13/16] lza cleanup --- pipelined/src/fpu/fmalza.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index d71b398e7..fd180fbb6 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -55,8 +55,6 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit - //f[n] = ~P[n]&P[n-1] note: n is the MSB - //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH] = ~sub&P[WIDTH-1]; assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); From 94fa7a00e7b28c1d9f32dd3d98e1579fe23917fe Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:13:16 -0700 Subject: [PATCH 14/16] Completed LZA simplificaiton --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmaadd.sv | 12 ++++++------ pipelined/src/fpu/fmalza.sv | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index dec492eba..950b55ff1 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,7 @@ module fma( logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -86,7 +86,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 4b52208c6..56ce5a74e 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -37,7 +37,7 @@ module fmaadd( input logic ZmSticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend @@ -45,7 +45,7 @@ module fmaadd( output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition @@ -57,7 +57,7 @@ module fmaadd( assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; // Do the addition @@ -66,11 +66,11 @@ module fmaadd( // PreSum -1 = don't add 1 +1 = add 2 // NegPreSum +1 = add 2 -1 = don't add 1 // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + assign PreSum = {{`NF+2{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+5{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; + assign NegSum = PreSum[3*`NF+5]; // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index fd180fbb6..65fe94266 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,7 +39,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+6; - logic [WIDTH:0] F; + logic [WIDTH:0] F; logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; From 7e4b04ff643624de43cdf64ea2144da46750324c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 16:18:02 -0700 Subject: [PATCH 15/16] Parameterized fmalza --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmalza.sv | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 950b55ff1..0106af7d5 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,8 +85,8 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - - fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + + fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index 65fe94266..8e92a5dc4 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -29,16 +29,14 @@ `include "wally-config.vh" -module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+5:0] A, // addend +module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] + input logic [WIDTH-1:0] A, // addend input logic [2*`NF+3:0] Pm, // product input logic Cin, // carry in input logic sub, - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result ); - localparam WIDTH = 3*`NF+6; - logic [WIDTH:0] F; logic [WIDTH-1:0] B, P, G, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; From 8147f7539952bcc3866af219c7e718f58a6bee26 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 19:40:55 -0700 Subject: [PATCH 16/16] Fixed fmaadd to work with new LZA --- pipelined/src/fpu/fma.sv | 4 ++-- pipelined/src/fpu/fmaadd.sv | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index dec492eba..950b55ff1 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,7 @@ module fma( logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -86,7 +86,7 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv[3*`NF+5:0]), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmaadd.sv b/pipelined/src/fpu/fmaadd.sv index 4b52208c6..53ed023f8 100644 --- a/pipelined/src/fpu/fmaadd.sv +++ b/pipelined/src/fpu/fmaadd.sv @@ -37,7 +37,7 @@ module fmaadd( input logic ZmSticky, input logic [`NE-1:0] Ze, input logic [`NE+1:0] Pe, - output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend @@ -45,7 +45,7 @@ module fmaadd( output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); - logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition @@ -57,7 +57,7 @@ module fmaadd( assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; + assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; // Do the addition @@ -66,14 +66,14 @@ module fmaadd( // PreSum -1 = don't add 1 +1 = add 2 // NegPreSum +1 = add 2 -1 = don't add 1 // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; + assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive - assign NegSum = PreSum[3*`NF+6]; +// assign NegSum = PreSum[3*`NF+6]; // Choose the positive sum and accompanying LZA result. - assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + assign Sm = NegSum ? NegPreSum : PreSum; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive