cvw/src/fpu/postproc/round.sv

331 lines
16 KiB
Systemverilog

///////////////////////////////////////////
// round.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Rounder
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module round import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction
// fma
input logic FmaOp, // is an fma opperation being done?
input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma
input logic FmaASticky, // addend's sticky bit
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
input logic CvtResSubnormUf, // is the cvt result subnormal or underflow
input logic CvtResUf, // does the cvt result underflow
input logic [P.NE:0] CvtCe, // the cvt calculated expoent
// outputs
output logic [P.NE+1:0] Me, // normalied fraction
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [P.NE-1:0] Re, // Result exponent
output logic [P.NF-1:0] Rf, // Result fractionNormS
output logic Sticky, // sticky bit
output logic Plus1, // do you add one to the final result
output logic Round, Guard // bits needed to calculate rounding
);
logic UfCalcPlus1; // calculated plus one for unbounded exponent
logic NormSticky; // normalized sum's sticky bit
logic [P.NF-1:0] RoundFrac; // rounded fraction
logic FpRes; // is the result a floating point
logic IntRes; // is the result an integer
logic FpGuard, FpRound; // floating point round/guard bits
logic FpLsbRes; // least significant bit of floating point result
logic LsbRes; // lsb of result
logic CalcPlus1; // calculated plus1
logic FpPlus1; // do you add one to the fp result
logic [P.FLEN:0] RoundAdd; // how much to add to the result
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
//`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
// determine what format the final result is in: int or fp
assign IntRes = ToInt;
assign FpRes = ~IntRes;
// sticky bit calculation
if (P.FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[P.NE+1]&FmaOp | DivSticky&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (P.FPSIZES == 1) begin
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
end else if (P.FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
P.FMT: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
end
P.FMT1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
end
P.FMT2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
end
default: begin
FpGuard = 1'bx;
FpLsbRes = 1'bx;
FpRound = 1'bx;
end
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
end
2'h1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
end
2'h0: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
end
2'h2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
end
endcase
end
assign Guard = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
assign LsbRes = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
assign Round = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
// place Plus1 into the proper position for the format
if (P.FPSIZES == 1) begin
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
end else if (P.FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
end else if (P.FPSIZES == 3) begin
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
end else if (P.FPSIZES == 4)
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
// trim unneeded bits from fraction
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
// select the exponent
always_comb
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
default: Me = '0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[P.NE-1:0];
endmodule