diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 8f13b2e3..b92bc07a 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -39,7 +39,7 @@ // MISA RISC-V configuration per specification // ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100101101 +`define MISA 32'b0000000000101000001000100100101 `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index db694869..4f7812a8 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 7dfec7e2..9e7ba49b 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -8,7 +8,8 @@ add wave -noupdate /testbenchfp/Z add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans add wave -noupdate /testbenchfp/DivStart -add wave -noupdate /testbenchfp/DivDone +add wave -noupdate /testbenchfp/DivBusy +add wave -noupdate /testbenchfp/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* @@ -17,12 +18,13 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/* +add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 1c7fbd03..9895c75b 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -1,11 +1,39 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Conversion shift calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module cvtshiftcalc( - input logic XZeroM, + input logic XZero, input logic ToInt, input logic IntToFp, input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic [`NF:0] XManM, // input mantissas + input logic [`NF:0] Xm, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic CvtResDenormUfM, @@ -32,8 +60,8 @@ module cvtshiftcalc( // - otherwise: // | LzcInM | 0's if nessisary | // change to int shift to the left one - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : - CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : + assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCalcExpM[`NE], Xm[`NF-1]|(CvtCalcExpM[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : + CvtResDenormUfM ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : {CvtLzcInM, {`NF+1{1'b0}}}; @@ -65,6 +93,6 @@ module cvtshiftcalc( // determine if the result underflows ??? -> fp // - if the first 1 is shifted out of the result then the result underflows // - can't underflow an integer to fp conversions - assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp; + assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/divconv_pipe.sv b/pipelined/src/fpu/divconv_pipe.sv index 3562ad3d..80dc8c7e 100755 --- a/pipelined/src/fpu/divconv_pipe.sv +++ b/pipelined/src/fpu/divconv_pipe.sv @@ -97,7 +97,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r // R4 Booth TDM multiplier (carry/save) redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry)); // Q*D - N (reversed but changed in rounder.v to account for sign reversal) - csa #(120) csa1 (Sum, Carry, constant, Sum2, Carry2); + csa #(120) csa1 (Sum, Carry, constant, 1'b0, Sum2, Carry2); // Add ulp for subtraction in remainder mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out); @@ -181,18 +181,18 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r endmodule // divconv // *** rewrote behaviorally dh 5 Jan 2021 for speed -module csa #(parameter WIDTH=8) ( - input logic [WIDTH-1:0] a, b, c, - output logic [WIDTH-1:0] sum, carry); +// module csa #(parameter WIDTH=8) ( +// input logic [WIDTH-1:0] a, b, c, +// output logic [WIDTH-1:0] sum, carry); - assign sum = a ^ b ^ c; - assign carry = (a & (b | c)) | (b & c); -/* - logic [WIDTH:0] carry_temp; - genvar i; - for (i=0;i> NF+1 Exp = DivCalcExp+NF+1 - // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivCalcExpM + // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExpM+NF+1 + // .00xxxxxxxxxxxxx... << DivCalcExpM+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivCalcExp+NF+1-1 - assign DivDenormShift = Nf+DivCalcExpM; + // Left shift amount = DivCalcExpM+NF+1-1 + assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExpM; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 - // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // 00000000x.xxxxxx... Exp = DivCalcExpM + // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExpM+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivCalcExpM+1 + // 00000000x.xxxxxx... << NF Exp = DivCalcExpM (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivCalcExpM-1 (determined after) // inital Left shift amount = NF - assign NormShift = Nf; + assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv new file mode 100644 index 00000000..086b97d8 --- /dev/null +++ b/pipelined/src/fpu/divsqrt.sv @@ -0,0 +1,68 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module divsqrt( + input logic clk, + input logic reset, + input logic [`FMTBITS-1:0] FmtE, + input logic [`NF:0] XManE, YManE, + input logic [`NE-1:0] XExpE, YExpE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic DivStartE, + input logic StallM, + input logic StallE, + output logic DivStickyM, + output logic DivNegStickyM, + output logic DivBusy, + output logic DivDone, + output logic [`NE+1:0] DivCalcExpM, + output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, + output logic [`DIVLEN+2:0] QuotM +// output logic [`XLEN-1:0] RemM, +); + + logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] WS, WC; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`DIVLEN-1:0] X; + logic [`DIVLEN-1:0] Dpreproc; + logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + + srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); + + srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index a1a934ff..6c7ab451 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -1,4 +1,31 @@ - +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: classify unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fclassify ( diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 3d8383ce..9c675784 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -1,4 +1,32 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Comparison unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" // FOpCtrlE values diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index f6ed650a..5c553e86 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: control unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fctrl ( diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 20ae1de1..8c29d5ab 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -1,4 +1,33 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Floating point conversions of configurable size +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module fcvt ( diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index a425ad9b..fc080b18 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -1,12 +1,40 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Post-Processing flag calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module flags( - input logic XSgnM, + input logic Xs, input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic Plus1, input logic InfIn, // is a Inf input being used - input logic XZeroM, YZeroM, // inputs are zero + input logic XZero, YZero, // inputs are zero input logic XNaNM, YNaNM, // inputs are NaN input logic NaNIn, // is a NaN input being used input logic Sqrt, // Sqrt? @@ -108,7 +136,7 @@ module flags( // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) // | | - assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid; + assign IntInexact = ((CvtCalcExpM[`NE]&~XZero)|Sticky|Round)&~IntInvalid; // select the inexact flag to output assign Inexact = ToInt ? IntInexact : FpInexact; @@ -125,18 +153,18 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]); + assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp); - assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); - assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt); + assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZero & YInfM) | (YZero & XInfM); + assign DivInvalid = ((XInfM & YInfM) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); // if dividing by zero and not 0/0 // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) - assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn); + assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn); // Combine flags // - to integer results do not set the underflow or overflow flags diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 18b5a30d..57b053da 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, David Harris +// Written: me@KatherineParry.com, David Harris // Modified: 6/23/2021 // // Purpose: Floating point multiply-accumulate of configurable size @@ -33,23 +33,23 @@ module fma( input logic Xs, Ys, Zs, // input's signs input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format - input logic XZeroE, YZeroE, ZZeroE, // is the input zero - input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single + input logic XZero, YZero, ZZero, // is the input zero + input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign - output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic [3*`NF+5:0] Sm, // the positive sum - output logic NegSumE, // was the sum negitive - output logic InvA, // intert Z - output logic ZSgnEffE, // the modified Z sign + output logic ZmSticky, // sticky bit that is calculated during alignment + output logic KillProd, // set the product to zero before addition if the product is too small to matter + output logic [3*`NF+5:0] Sm, // the positive sum's significand + output logic NegSum, // was the sum negitive + output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) + output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign - output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt + output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*`NF+5:0] Am; // Z aligned for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AmInv; // aligned addend possibly inverted + logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) + logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum /////////////////////////////////////////////////////////////////////////////// @@ -62,7 +62,7 @@ module fma( // calculate the product's exponent - expadd expadd(.FmtE, .Xe, .Ye, .XZeroE, .YZeroE, .Pe); + expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); // multiplication of the mantissa's mult mult(.Xm, .Ym, .Pm); @@ -71,31 +71,31 @@ module fma( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - align align(.Ze, .Zm, .XZeroE, .YZeroE, .ZZeroE, .Xe, .Ye, - .Am, .AddendStickyE, .KillProdE); + align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + .Am, .ZmSticky, .KillProd); // calculate the signs and take the opperation into account - sign sign(.FOpCtrlE, .Xs, .Ys, .Zs, .Ps, .ZSgnEffE); + sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .ZSgnEffE, .KillProdE, .AmInv, .PmKilled, .NegSumE, .PreSum, .NegPreSum, .InvA, .XZeroE, .YZeroE, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .FmaNormCntE); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt); endmodule module expadd( - input logic [`FMTBITS-1:0] FmtE, // precision - input logic [`NE-1:0] Xe, Ye, // input exponents - input logic XZeroE, YZeroE, // are the inputs zero + input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad + input logic [`NE-1:0] Xe, Ye, // input's exponents + input logic XZero, YZero, // are the inputs zero output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 ); // kill the exponent if the product is zero - either X or Y is 0 - assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}}; + assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; endmodule @@ -118,19 +118,19 @@ endmodule module sign( - input logic [2:0] FOpCtrlE, // precision - input logic Xs, Ys, Zs, // are the inputs denormalized + input logic [2:0] FOpCtrl, // opperation contol + input logic Xs, Ys, Zs, // sign of the inputs output logic Ps, // the product's sign - takes opperation into account - output logic ZSgnEffE // Z sign used in fma - takes opperation into account + output logic As // aligned addend sign used in fma - takes opperation into account ); // Calculate the product's sign // Negate product's sign if FNMADD or FNMSUB // flip is negation opperation - assign Ps = Xs ^ Ys ^ (FOpCtrlE[1]&~FOpCtrlE[2]); + assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); // flip if subtraction - assign ZSgnEffE = Zs^FOpCtrlE[0]; + assign As = Zs^FOpCtrl[0]; endmodule @@ -143,16 +143,16 @@ endmodule module align( input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format - input logic [`NF:0] Zm, // fractions in U(0.NF) format] - input logic XZeroE, YZeroE, ZZeroE, // is the input zero - output logic [3*`NF+5:0] Am, // Z aligned for addition in U(NF+5.2NF+1) - output logic AddendStickyE, // Sticky bit calculated from the aliged addend - output logic KillProdE // should the product be set to zero + input logic [`NF:0] Zm, // significand in U(0.NF) format] + input logic XZero, YZero, ZZero, // is the input zero + output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic ZmSticky, // Sticky bit calculated from the aliged addend + output logic KillProd // should the product be set to zero ); - logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; /////////////////////////////////////////////////////////////////////////////// @@ -162,18 +162,18 @@ module align( // determine the shift count for alignment // - negitive means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right - // This could have been done using Pe, but AlignCnt is on the critical path so we replicate logic for speed - assign AlignCnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; + // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {Zm,(3*`NF+5)'(0)}; + assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - assign KillProdE = AlignCnt[`NE+1]|XZeroE|YZeroE; - assign KillZ = $signed(AlignCnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); + assign KillProd = ACnt[`NE+1]|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); always_comb begin @@ -182,9 +182,9 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - if (KillProdE) begin - ZManShifted = ZManPreShifted; - AddendStickyE = ~(XZeroE|YZeroE); + if (KillProd) begin + ZmShifted = ZmPreshifted; + ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the addend to be considered too small @@ -193,20 +193,20 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | end else if (KillZ) begin - ZManShifted = 0; - AddendStickyE = ~ZZeroE; + ZmShifted = 0; + ZmSticky = ~ZZero; // If the Addend is shifted right // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | end else begin - ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[`NF-1:0]); + ZmShifted = ZmPreshifted >> ACnt; + ZmSticky = |(ZmShifted[`NF-1:0]); end end - assign Am = ZManShifted[4*`NF+5:`NF]; + assign Am = ZmShifted[4*`NF+5:`NF]; endmodule @@ -217,15 +217,15 @@ endmodule module add( - input logic [3*`NF+5:0] Am, // Z aligned for addition in U(NF+5.2NF+1) + input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa - input logic Ps, ZSgnEffE,// the product and modified Z signs - input logic KillProdE, // should the product be set to 0 - input logic XZeroE, YZeroE, // is the input zero + input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic KillProd, // should the product be set to 0 + input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed - output logic NegSumE, // was the sum negitive - output logic InvA, // do you invert Z + output logic NegSum, // was the sum negitive + output logic InvA, // do you invert the aligned addend output logic [3*`NF+5:0] Sm, // the positive sum output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum ); @@ -237,12 +237,12 @@ module add( // Negate Z when doing one of the following opperations: // -prod + Z // prod - Z - assign InvA = ZSgnEffE ^ Ps; + assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = Pm&{2*`NF+2{~KillProdE}}; + assign PmKilled = Pm&{2*`NF+2{~KillProd}}; @@ -252,17 +252,17 @@ module add( assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)}; // Is the sum negitive - assign NegSumE = PreSum[3*`NF+6]; + assign NegSum = PreSum[3*`NF+6]; // Choose the positive sum and accompanying LZA result. - assign Sm = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+1:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result + output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -290,6 +290,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE)); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 727d26e0..8601297c 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -1,11 +1,39 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Fma shift calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fmashiftcalc( input logic [3*`NF+5:0] SumM, // the positive sum - input logic [`NE-1:0] ZExpM, // exponent of Z + input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count - input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic KillProdM, // is the product set to zero input logic ZDenormM, output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results @@ -25,18 +53,18 @@ module fmashiftcalc( assign SumZero = ~(|SumM); // calculate the sum's exponent - assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4); + assign NormSumExp = KillProdM ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin assign ConvNormSumExp = NormSumExp; end else if (`FPSIZES == 2) begin - assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + assign ConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; end else if (`FPSIZES == 3) begin always_comb begin - case (FmtM) + case (Fmt) `FMT: ConvNormSumExp = NormSumExp; `FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; `FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; @@ -46,7 +74,7 @@ module fmashiftcalc( end else if (`FPSIZES == 4) begin always_comb begin - case (FmtM) + case (Fmt) 2'h3: ConvNormSumExp = NormSumExp; 2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; 2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; @@ -70,7 +98,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero; + assign PreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~SumZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -81,7 +109,7 @@ module fmashiftcalc( assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; always_comb begin - case (FmtM) + case (Fmt) `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; @@ -100,7 +128,7 @@ module fmashiftcalc( assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; always_comb begin - case (FmtM) + case (Fmt) 2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; 2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; 2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 7cf10901..99c9e305 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, James Stine, Brett Mathis +// Written: me@KatherineParry.com, James Stine, Brett Mathis // Modified: 6/23/2021 // // Purpose: FPU @@ -125,11 +125,12 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN+2:0] Quot; - logic [`NE+1:0] DivCalcExpM; - logic DivNegStickyM; - logic DivStickyM; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M; + logic [`DIVLEN+2:0] QuotE, QuotM; + logic [`NE+1:0] DivCalcExpE, DivCalcExpM; + logic DivNegStickyE, DivNegStickyM; + logic DivStickyE, DivStickyM; + logic DivDoneM; + logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -185,9 +186,10 @@ module fpu ( flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); + flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); // EXECUTION STAGE @@ -249,37 +251,45 @@ module fpu ( .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); // fma - does multiply, add, and multiply-add instructions - fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), - .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZeroE, .YZeroE, .ZZeroE, - .FOpCtrlE, .FmtE, .Sm(SumE), .NegSumE, .InvA(InvAE), .FmaNormCntE, - .ZSgnEffE, .Ps(PSgnE), .Pe(ProdExpE), .AddendStickyE, .KillProdE); - - // fpdivsqrt using Goldschmidt's iteration - if(`FLEN == 64) begin - flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - end - else if (`FLEN == 32) begin - flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - end - flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), + .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), + .Xm(XManE), .Ym(YManE), .Zm(ZManE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), + .FOpCtrl(FOpCtrlE), .Fmt(FmtE), + .As(ZSgnEffE), .Ps(PSgnE), + .Sm(SumE), .Pe(ProdExpE), + .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), + .ZmSticky(AddendStickyE), .KillProd(KillProdE)); + // // fpdivsqrt using Goldschmidt's iteration + // if(`FLEN == 64) begin + // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // end + // else if (`FLEN == 32) begin + // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // end + // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), + // .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), + // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, + // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), + .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal + .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); @@ -371,10 +381,10 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M, - .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, - .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, - .NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM, + postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .ProdExpM, .EarlyTermShiftDiv2M, + .AddendStickyM, .KillProdM, .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInfM, .YInfM, .Quot(QuotM), + .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .DivDone(DivDoneM), + .NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrl(FOpCtrlM), .FmaNormCntM, .DivNegStickyM, .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM, .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM); diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 0d08d31f..17d15669 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry +// Written: me@KatherineParry.com // Modified: 6/23/2021 // // Purpose: FPU Sign Injection instructions diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index a7a8143e..d1233393 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module lzacorrection( diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv index 2dee1f18..318c04da 100644 --- a/pipelined/src/fpu/negateintres.sv +++ b/pipelined/src/fpu/negateintres.sv @@ -1,7 +1,35 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Negate integer result +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module negateintres( - input logic XSgnM, + input logic Xs, input logic [`NORMSHIFTSZ-1:0] Shifted, input logic Signed, input logic Int64, @@ -12,7 +40,7 @@ module negateintres( // round and negate the positive res if needed - assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; + assign NegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] : Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32]; diff --git a/pipelined/src/fpu/normshift.sv b/pipelined/src/fpu/normshift.sv index aa694228..f382eed3 100644 --- a/pipelined/src/fpu/normshift.sv +++ b/pipelined/src/fpu/normshift.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: normalization shifter +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index c81a896e..ce9d33ed 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -1,9 +1,9 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, David Harris -// Modified: 6/23/2021 +// Written: me@KatherineParry.com +// Modified: 7/5/2022 // -// Purpose: Floating point multiply-accumulate of configurable size +// Purpose: Post-Processing // // A component of the Wally configurable RISC-V project. // @@ -31,13 +31,13 @@ module postprocess( // general signals - input logic XSgnM, YSgnM, // input signs - input logic [`NE-1:0] ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single - input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Ze, // input exponents + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, ZZero, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs @@ -55,9 +55,10 @@ module postprocess( input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count //divide signals input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, - input logic [`NE+1:0] DivCalcExpM, // the calculated expoent input logic DivStickyM, input logic DivNegStickyM, + input logic DivDone, + input logic [`NE+1:0] DivCalcExpM, input logic [`DIVLEN+2:0] Quot, // conversion signals input logic [`NE:0] CvtCalcExpM, // the calculated expoent @@ -125,15 +126,15 @@ module postprocess( logic Sqrt; // signals to help readability - assign Signed = FOpCtrlM[0]; - assign Int64 = FOpCtrlM[1]; - assign IntToFp = FOpCtrlM[2]; + assign Signed = FOpCtrl[0]; + assign Int64 = FOpCtrl[1]; + assign IntToFp = FOpCtrl[2]; assign ToInt = FWriteIntM; - assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0]; + assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; assign CvtOp = (PostProcSelM == 2'b00); assign FmaOp = (PostProcSelM == 2'b10); - assign DivOp = (PostProcSelM == 2'b01); - assign Sqrt = FOpCtrlM[0]; + assign DivOp = (PostProcSelM == 2'b01)&DivDone; + assign Sqrt = FOpCtrl[0]; // is there an input of infinity or NaN being used assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp); @@ -141,21 +142,21 @@ module postprocess( // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output - // - otherwise: FmtM contains the percision of the output + // - otherwise: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT); + assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0]; + assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM, - .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, + cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .Xm, .CvtLzcInM, + .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); + fmashiftcalc fmashiftcalc(.SumM, .Ze, .ProdExpM, .FmaNormCntM, .Fmt, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivCalcExpM, .Quot, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSelM) @@ -168,8 +169,13 @@ module postprocess( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div - ShiftAmt = DivShiftAmt; - ShiftIn = DivShiftIn; + if(DivDone) begin + ShiftAmt = DivShiftAmt; + ShiftIn = DivShiftIn; + end else begin + ShiftAmt = '0; + ShiftIn = '0; + end end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -193,27 +199,28 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, + + roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM, + .Xs, .Ys, .CvtResSgnM, .RoundSgn); + + round round(.OutFmt, .Frm, .Sticky, .AddendStickyM, .ZZero, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, - .DivStickyM, .DivNegStickyM, + .DivStickyM, .DivNegStickyM, .DivDone, .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky, + resultsign resultsign(.Frm, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky, .FmaOp, .ZInfM, .InfIn, .SumZero, .Mult, .RoundSgn, .ResSgn); - - roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM, - .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn); /////////////////////////////////////////////////////////////////////////////// // Flags /////////////////////////////////////////////////////////////////////////////// - flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM, - .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM, + flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZero, .YZero, + .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM, .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero, .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, .RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM); @@ -222,9 +229,9 @@ module postprocess( // Select the result /////////////////////////////////////////////////////////////////////////////// - negateintres negateintres(.XSgnM, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes); - resultselect resultselect(.XSgnM, .XManM, .YManM, .ZManM, .XZeroM, .IntInvalid, - .IntZeroM, .FrmM, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf, + negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes); + resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + .IntZeroM, .Frm, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegRes, .XInfM, .YInfM, .DivOp, .DivByZero, .FullResExp, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM); diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv index 81f6a74d..7ead5cd0 100644 --- a/pipelined/src/fpu/resultselect.sv +++ b/pipelined/src/fpu/resultselect.sv @@ -1,14 +1,43 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module resultselect( - input logic XSgnM, // input signs - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas + input logic Xs, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FMTBITS-1:0] OutFmt, // output format input logic InfIn, input logic XInfM, YInfM, - input logic XZeroM, + input logic XZero, input logic IntZeroM, input logic NaNIn, input logic IntToFp, @@ -39,29 +68,29 @@ module resultselect( // does the overflow result output the maximum normalized floating point number // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn)); + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~ResSgn) | (Frm[1:0]==2'b11&ResSgn)); if (`FPSIZES == 1) begin //NaN res selection depending on standard if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)}; + assign UfRes = {ResSgn, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInfM)}; assign NormRes = {ResSgn, ResExp, ResFrac}; end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; + assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end else begin assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; @@ -69,7 +98,7 @@ module resultselect( assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} : OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]}; end else if (`FPSIZES == 3) begin @@ -77,43 +106,43 @@ module resultselect( case (OutFmt) `FMT: begin if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {ResSgn, ResExp, ResFrac}; end `FMT1: begin if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; + XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end else begin InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]}; end `FMT2: begin if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]}; + XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; + ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; end else begin InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; end OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]}; end default: begin @@ -136,50 +165,50 @@ module resultselect( case (OutFmt) 2'h3: begin if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {ResSgn, ResExp, ResFrac}; end 2'h1: begin if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]}; + XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; + ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; end else begin InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; end OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]}; end 2'h0: begin if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]}; + XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; + ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; end else begin InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; end OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]}; end 2'h2: begin if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]}; + XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; + ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; end else begin InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; @@ -187,7 +216,7 @@ module resultselect( OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; + UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)}; NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]}; end endcase @@ -202,7 +231,7 @@ module resultselect( // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1); + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZero)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp)); // output infinity with result sign if divide by zero if(`IEEE754) begin @@ -243,9 +272,9 @@ module resultselect( // unsigned | 2^32-1 | 2^64-1 | // // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive + assign OfIntRes = Signed ? Xs&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive + Xs&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive {`XLEN{1'b1}};// unsigned positive @@ -256,6 +285,6 @@ module resultselect( // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) assign FCvtIntResM = IntInvalid ? OfIntRes : - CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? + CvtCalcExpM[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index fe085d46..f0776de0 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -1,7 +1,35 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: calculating the result's sign +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module resultsign( - input logic [2:0] FrmM, + input logic [2:0] Frm, input logic PSgnM, ZSgnEffM, input logic ZInfM, input logic InfIn, @@ -25,7 +53,7 @@ module resultsign( // if multiply then Psgn // otherwise psign assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)); - assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM; + assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : PSgnM; // is the result negitive diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 532e1729..a038151e 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Rounder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" // what position is XLEN in? // options: @@ -9,17 +37,18 @@ module round( input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single - input logic [2:0] FrmM, // rounding mode + input logic [2:0] Frm, // rounding mode input logic FmaOp, input logic DivOp, input logic CvtOp, input logic ToInt, + input logic DivDone, input logic [1:0] PostProcSelM, input logic CvtResDenormUfM, input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] CorrShifted, input logic AddendStickyM, // addend's sticky bit - input logic ZZeroM, // is Z zero + input logic ZZero, // is Z zero input logic InvZM, // invert Z input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign @@ -227,13 +256,13 @@ module round( // Deterimine if a small number was supposed to be subtrated // - for FMA or if division has a negitive sticky bit - assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound); - assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky; + assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound); + assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky; always_comb begin // Determine if you add 1 - case (FrmM) + case (Frm) 3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero 3'b010: CalcPlus1 = RoundSgn & ~(SubBySmallNum & ~Round);//round down @@ -242,7 +271,7 @@ module round( default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) - case (FrmM) + case (Frm) 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero 3'b010: UfCalcPlus1 = RoundSgn & ~(UfSubBySmallNum & ~UfRound);//round down @@ -251,7 +280,7 @@ module round( default: UfCalcPlus1 = 1'bx; endcase // Determine if you subtract 1 - case (FrmM) + case (Frm) 3'b000: CalcMinus1 = 0;//round to nearest even 3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero 3'b010: CalcMinus1 = ~RoundSgn & ~Round & SubBySmallNum;//round down @@ -309,8 +338,8 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = CorrDivExp; // divide - default: RoundExp = 0; + 2'b01: RoundExp = DivDone ? CorrDivExp : '0; // divide + default: RoundExp = '0; endcase // round the result diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index a5a34642..2365f6d9 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -1,10 +1,38 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Sign calculation ofr rounding +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module roundsign( input logic PSgnM, ZSgnEffM, input logic InvZM, - input logic XSgnM, - input logic YSgnM, + input logic Xs, + input logic Ys, input logic NegSumM, input logic FmaOp, input logic DivOp, @@ -24,7 +52,7 @@ module roundsign( // assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM); - assign DivSgn = XSgnM^YSgnM; + assign DivSgn = Xs^Ys; // Sign for rounding calulation assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv similarity index 65% rename from pipelined/srt/srt-radix4.sv rename to pipelined/src/fpu/srt-radix4.sv index 39432c9e..741d4e83 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -1,8 +1,8 @@ /////////////////////////////////////////// // srt.sv // -// Written: David_Harris@hmc.edu 13 January 2022 -// Modified: +// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek +// Modified:13 January 2022 // // Purpose: Combined Divide and Square Root Floating Point and Integer Unit // @@ -33,38 +33,27 @@ module srtradix4 ( input logic clk, input logic DivStart, + input logic DivBusy, + input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] XExpE, YExpE, - input logic [`NF:0] XManE, YManE, - input logic [`XLEN-1:0] SrcA, SrcB, - input logic XInfE, YInfE, input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic W64, // 32-bit ints on XLEN=64 - input logic Signed, // Interpret integers as signed 2's complement - input logic Int, // Choose integer inputs - input logic Sqrt, // perform square root, not divide - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, - output logic DivDone, - output logic DivStickyE, - output logic DivNegStickyE, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`DIVLEN+2:0] Quot, - output logic [`XLEN-1:0] Rem, // *** later handle integers - output logic [`NE+1:0] DivCalcExpE + output logic [`DIVLEN+3:0] WSN, WCN, + output logic [`DIVLEN+3:0] WS, WC, + output logic [`NE+1:0] DivCalcExpM, + output logic [`XLEN-1:0] Rem ); logic [3:0] q; - logic [`NE+1:0] DivCalcExp; - logic [`DIVLEN-1:0] X; - logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] WS, WSA, WSN; - logic [`DIVLEN+3:0] WC, WCA, WCN; + logic [`DIVLEN+3:0] WSA; + logic [`DIVLEN+3:0] WCA; logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; + logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic intSign; - - srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X, - .XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -79,6 +68,7 @@ module srtradix4 ( mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); + flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -91,9 +81,6 @@ module srtradix4 ( // 0001 = -2 qsel4 qsel4(.D, .WS, .WC, .q); - // Store the expoenent and sign until division is DivDone - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE); - // Divisor Selection logic // *** radix 4 change to choose -2 to 2 // - choose the negitive version of what's being selected @@ -116,12 +103,9 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 otfc4(.clk, .DivStart, .q, .Quot); + otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot); - expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); - - earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E, - .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone); + expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); endmodule @@ -129,38 +113,7 @@ endmodule // Submodules // //////////////// -module earlytermination( - input logic clk, - input logic [`DIVLEN+3:0] WS, WC, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic DivStart, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, - output logic DivStickyE, - output logic DivNegStickyE, - output logic DivDone); - - logic [$clog2(`DIVLEN/2+3)-1:0] Count; - logic WZero; - logic [`DIVLEN+3:0] W; - assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; - assign DivDone = (DivStickyE | WZero); - assign DivStickyE = ~|Count; - assign W = WC+WS; - assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? - assign EarlyTermShiftDiv2E = Count; - // +1 for setup - // `DIVLEN/2 to get required number of bits - // +1 for possible .5 and round bit - // Count down Counter - always @(posedge clk) - begin - if (DivStart) Count <= #1 `DIVLEN/2+2; - else Count <= #1 Count-1; - end -endmodule module qsel4 ( input logic [`DIVLEN+3:0] D, @@ -234,58 +187,13 @@ module qsel4 ( endmodule -/////////////////// -// Preprocessing // -/////////////////// -module srtpreproc ( - input logic [`XLEN-1:0] SrcA, SrcB, - input logic [`NF:0] XManE, YManE, - input logic W64, // 32-bit ints on XLEN=64 - input logic Signed, // Interpret integers as signed 2's complement - input logic Int, // Choose integer inputs - input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN-1:0] X, - output logic [`DIVLEN-1:0] Dpreproc, - output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent - output logic intSign // Quotient integer sign -); - // logic [`XLEN-1:0] PosA, PosB; - // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN-1:0] PreprocA, PreprocX; - logic [`DIVLEN-1:0] PreprocB, PreprocY; - - // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; - // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; - // lzc #(`XLEN) lzcA (PosA, zeroCntA); - // lzc #(`XLEN) lzcB (PosB, zeroCntB); - - // ***can probably merge X LZC with conversion - // cout the number of leading zeros - lzc #(`NF+1) lzcA (XManE, XZeroCnt); - lzc #(`NF+1) lzcB (YManE, YZeroCnt); - - // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; - // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; - - // assign PreprocA = ExtraA << zeroCntA; - // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XManE[`NF-1:0]<Fp?, is the integer a long} casex ({OpCtrl[2:1]}) 2'b11: begin // long -> double @@ -1172,7 +1199,7 @@ module readvectors ( end endcase end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single // {is the integer a long, is the opperation to an integer} casex ({OpCtrl[2:1]}) 2'b11: begin // long -> single diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 0d57dbdd..8971e544 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -203,16 +203,16 @@ string imperas32f[] = '{ "rv32i_m/F/FCVT-WU-S-RNE-01", "rv32i_m/F/FCVT-WU-S-RTZ-01", "rv32i_m/F/FCVT-WU-S-RUP-01", - // "rv32i_m/F/FDIV-S-DYN-RDN-01", - // "rv32i_m/F/FDIV-S-DYN-RMM-01", - // "rv32i_m/F/FDIV-S-DYN-RNE-01", - // "rv32i_m/F/FDIV-S-DYN-RTZ-01", - // "rv32i_m/F/FDIV-S-DYN-RUP-01", - // "rv32i_m/F/FDIV-S-RDN-01", - // "rv32i_m/F/FDIV-S-RMM-01", - // "rv32i_m/F/FDIV-S-RNE-01", - // "rv32i_m/F/FDIV-S-RTZ-01", - // "rv32i_m/F/FDIV-S-RUP-01", + "rv32i_m/F/FDIV-S-DYN-RDN-01", + "rv32i_m/F/FDIV-S-DYN-RMM-01", + "rv32i_m/F/FDIV-S-DYN-RNE-01", + "rv32i_m/F/FDIV-S-DYN-RTZ-01", + "rv32i_m/F/FDIV-S-DYN-RUP-01", + "rv32i_m/F/FDIV-S-RDN-01", + "rv32i_m/F/FDIV-S-RMM-01", + "rv32i_m/F/FDIV-S-RNE-01", + "rv32i_m/F/FDIV-S-RTZ-01", + "rv32i_m/F/FDIV-S-RUP-01", "rv32i_m/F/FEQ-S-01", "rv32i_m/F/FLE-S-01", "rv32i_m/F/FLT-S-01", @@ -390,16 +390,16 @@ string imperas32f[] = '{ "rv64i_m/F/FCVT-WU-S-RNE-01", "rv64i_m/F/FCVT-WU-S-RTZ-01", "rv64i_m/F/FCVT-WU-S-RUP-01", - // "rv64i_m/F/FDIV-S-DYN-RDN-01", - // "rv64i_m/F/FDIV-S-DYN-RMM-01", - // "rv64i_m/F/FDIV-S-DYN-RNE-01", - // "rv64i_m/F/FDIV-S-DYN-RTZ-01", - // "rv64i_m/F/FDIV-S-DYN-RUP-01", - // "rv64i_m/F/FDIV-S-RDN-01", - // "rv64i_m/F/FDIV-S-RMM-01", - // "rv64i_m/F/FDIV-S-RNE-01", - // "rv64i_m/F/FDIV-S-RTZ-01", - // "rv64i_m/F/FDIV-S-RUP-01", + "rv64i_m/F/FDIV-S-DYN-RDN-01", + "rv64i_m/F/FDIV-S-DYN-RMM-01", + "rv64i_m/F/FDIV-S-DYN-RNE-01", + "rv64i_m/F/FDIV-S-DYN-RTZ-01", + "rv64i_m/F/FDIV-S-DYN-RUP-01", + "rv64i_m/F/FDIV-S-RDN-01", + "rv64i_m/F/FDIV-S-RMM-01", + "rv64i_m/F/FDIV-S-RNE-01", + "rv64i_m/F/FDIV-S-RTZ-01", + "rv64i_m/F/FDIV-S-RUP-01", "rv64i_m/F/FEQ-S-01", "rv64i_m/F/FLE-S-01", "rv64i_m/F/FLT-S-01", @@ -570,16 +570,16 @@ string imperas32f[] = '{ "rv64i_m/D/FCVT-WU-D-RNE-01", "rv64i_m/D/FCVT-WU-D-RTZ-01", "rv64i_m/D/FCVT-WU-D-RUP-01", - // "rv64i_m/D/FDIV-D-DYN-RDN-01", - // "rv64i_m/D/FDIV-D-DYN-RMM-01", - // "rv64i_m/D/FDIV-D-DYN-RNE-01", - // "rv64i_m/D/FDIV-D-DYN-RTZ-01", - // "rv64i_m/D/FDIV-D-DYN-RUP-01", - // "rv64i_m/D/FDIV-D-RDN-01", - // "rv64i_m/D/FDIV-D-RMM-01", - // "rv64i_m/D/FDIV-D-RNE-01", - // "rv64i_m/D/FDIV-D-RTZ-01", - // "rv64i_m/D/FDIV-D-RUP-01", + "rv64i_m/D/FDIV-D-DYN-RDN-01", + "rv64i_m/D/FDIV-D-DYN-RMM-01", + "rv64i_m/D/FDIV-D-DYN-RNE-01", + "rv64i_m/D/FDIV-D-DYN-RTZ-01", + "rv64i_m/D/FDIV-D-DYN-RUP-01", + "rv64i_m/D/FDIV-D-RDN-01", + "rv64i_m/D/FDIV-D-RMM-01", + "rv64i_m/D/FDIV-D-RNE-01", + "rv64i_m/D/FDIV-D-RTZ-01", + "rv64i_m/D/FDIV-D-RUP-01", "rv64i_m/D/FEQ-D-01", "rv64i_m/D/FLD-01", "rv64i_m/D/FLE-D-01", @@ -1119,17 +1119,17 @@ string imperas32f[] = '{ "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S/ref/Ref", "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S/ref/Ref", "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref", "rv64i_m/D/src/d_feq_b1-01.S/ref/Ref", "rv64i_m/D/src/d_feq_b19-01.S/ref/Ref", "rv64i_m/D/src/d_fle_b1-01.S/ref/Ref", @@ -1291,17 +1291,17 @@ string imperas32f[] = '{ "rv32i_m/F/src/fcvt.wu.s_b27-01.S/ref/Ref", "rv32i_m/F/src/fcvt.wu.s_b28-01.S/ref/Ref", "rv32i_m/F/src/fcvt.wu.s_b29-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref", "rv32i_m/F/src/feq_b1-01.S/ref/Ref", "rv32i_m/F/src/feq_b19-01.S/ref/Ref", "rv32i_m/F/src/fle_b1-01.S/ref/Ref",