Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2021-06-04 15:16:39 -05:00
commit 41a1e6112a
28 changed files with 220102 additions and 124314 deletions

View File

@ -0,0 +1 @@
vsim -do wally-pipelined-rv64icfd.do

View File

@ -0,0 +1,50 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined.do ../config/rv32ic
switch $argc {
0 {vlog +incdir+../config/rv64icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
view wave
-- display input and output signals as hexidecimal values
do ./wave-dos/default-waves.do
-- Run the Simulation
#run 5000
run -all
#quit
noview ../testbench/testbench-imperas.sv
view wave

View File

@ -1,103 +1,137 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Exponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
xinfE, yinfE, zinfE, nanE, prodinfE);
/////////////////////////////////////////////////////////////////////////////
module fma1(
input logic [63:0] ReadData1E; // input 1
input logic [63:0] ReadData2E; // input 2
input logic [63:0] ReadData3E; // input 3
input logic [2:0] FrmE; // Rounding mode
output logic [12:0] aligncntE; // status flags
output logic [105:0] rE; // one result of partial product sum
output logic [105:0] sE; // other result of partial products
output logic [163:0] tE; // output logic of alignment shifter
output logic [12:0] aeE; // multiplier expoent
output logic bsE; // sticky bit of addend
output logic killprodE; // ReadData3E >> product
output logic xzeroE;
output logic yzeroE;
output logic zzeroE;
output logic xdenormE;
output logic ydenormE;
output logic zdenormE;
output logic xinfE;
output logic yinfE;
output logic zinfE;
output logic xnanE;
output logic ynanE;
output logic znanE;
output logic nanE;
output logic prodinfE;
output logic [8:0] sumshiftE;
output logic sumshiftzeroE;
input logic [63:0] ReadData1E,
input logic [63:0] ReadData2E,
input logic [63:0] ReadData3E,
output logic [105:0] ProdManE,
output logic [161:0] AlignedAddendE,
output logic [12:0] ProdExpE,
output logic AddendStickyE,
output logic KillProdE,
output logic XZeroE, YZeroE, ZZeroE,
output logic XInfE, YInfE, ZInfE,
output logic XNaNE, YNaNE, ZNaNE);
// Internal nodes
// output logic [12:0] aligncntE; // shift count for alignment
logic [51:0] XMan,YMan,ZMan;
logic [10:0] XExp,YExp,ZExp;
logic XSgn,YSgn,ZSgn;
logic [12:0] AlignCnt;
logic [211:0] Shift;
logic XDenormE, YDenormE, ZDenormE;
logic prodof; // ReadData1E*ReadData2E out of range
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = ReadData1E[63];
assign YSgn = ReadData2E[63];
assign ZSgn = ReadData3E[63];
assign XExp = ReadData1E[62:52];
assign YExp = ReadData2E[62:52];
assign ZExp = ReadData3E[62:52];
assign XMan = ReadData1E[51:0];
assign YMan = ReadData2E[51:0];
assign ZMan = ReadData3E[51:0];
// determine if an input is a special value
assign XNaNE = &ReadData1E[62:52] && |ReadData1E[51:0];
assign YNaNE = &ReadData2E[62:52] && |ReadData2E[51:0];
assign ZNaNE = &ReadData3E[62:52] && |ReadData3E[51:0];
assign XDenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
assign YDenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
assign ZDenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
assign XInfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
assign YInfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
assign ZInfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
assign XZeroE = ~(|ReadData1E[62:0]);
assign YZeroE = ~(|ReadData2E[62:0]);
assign ZZeroE = ~(|ReadData3E[62:0]);
// Calculate the product's exponent
// - When multipliying two fp numbers, add the exponents
// - Subtract 3ff to remove one of the biases (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
{2'b0, XExp} + {2'b0, YExp} - 13'h3ff + XDenormE + YDenormE;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = {53'b0,~(XDenormE|XZeroE),XMan} * {53'b0,~(YDenormE|YZeroE),YMan};
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
// Alignment shifter
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the decimal point (xx.xxxxxxxxxx...)
always_comb
begin
// Set default values
AddendStickyE = 0;
KillProdE = 0;
// If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-56)) begin
KillProdE = 1;
AlignedAddendE = {55'b0, ~(ZZeroE|ZDenormE),ZMan,2'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(0)) begin
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} << -AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(105)) begin
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} >> AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
AlignedAddendE = 162'b0;
AddendStickyE = ~ZZeroE;
// Instantiate fraction datapath
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
align align(.zman(ReadData3E[51:0]),.*);
// Instantiate exponent datapath
expgen1 expgen1(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
// Instantiate control output logic
flag1 flag1(.*);
end
end
endmodule

View File

@ -1,104 +1,107 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Mxponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1M or ReadData3M input logics
// sign One bit sign handling block
// special Catch special cases (input logics = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IMMM flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
tM, normcntM, aeM, bsM,killprodM,
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
zdenormM,xinfM,yinfM,zinfM,xnanM,ynanM,znanM,
nanM,sumshiftM,sumshiftzeroM,prodinfM
);
/////////////////////////////////////////////////////////////////////////////
module fma2(
input logic [63:0] ReadData1M; // input logic 1
input logic [63:0] ReadData2M; // input logic 2
input logic [63:0] ReadData3M; // input logic 3
input logic [2:0] FrmM; // Rounding mode
input logic [12:0] aligncntM; // status flags
input logic [105:0] rM; // one result of partial product sum
input logic [105:0] sM; // other result of partial products
input logic [163:0] tM; // output of alignment shifter
input logic [8:0] normcntM; // shift count for normalizer
input logic [12:0] aeM; // multiplier expoent
input logic bsM; // sticky bit of addend
input logic killprodM; // ReadData3M >> product
input logic prodinfM;
input logic xzeroM;
input logic yzeroM;
input logic zzeroM;
input logic xdenormM;
input logic ydenormM;
input logic zdenormM;
input logic xinfM;
input logic yinfM;
input logic zinfM;
input logic xnanM;
input logic ynanM;
input logic znanM;
input logic nanM;
input logic [8:0] sumshiftM;
input logic sumshiftzeroM;
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
output logic [4:0] FmaFlagsM; // status flags
input logic [63:0] ReadData1M,
input logic [63:0] ReadData2M,
input logic [63:0] ReadData3M,
input logic [2:0] FrmM,
input logic [105:0] ProdManM,
input logic [161:0] AlignedAddendM,
input logic [12:0] ProdExpM,
input logic AddendStickyM,
input logic KillProdM,
input logic [3:0] FOpCtrlM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic XNaNM, YNaNM, ZNaNM,
output logic [63:0] FmaResultM,
output logic [4:0] FmaFlagsM);
// Internal nodes
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
logic negsum0; // sum +0 < 0
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic sumof; // result out of range
logic zexpsel;
logic denorm0;
logic resultdenorm;
logic inf;
logic specialsel;
logic expplus1;
logic sumuf;
logic psign;
logic sticky;
logic [12:0] de0;
logic isAdd;
assign isAdd = 1;
logic [51:0] XMan, YMan, ZMan, WMan;
logic [10:0] XExp, YExp, ZExp, WExp;
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
logic IsSub;
logic [105:0] ProdMan2;
logic [162:0] AlignedAddend2;
logic [161:0] Sum;
logic [162:0] SumTmp;
logic [12:0] SumExp;
logic [12:0] SumExpMinus1;
logic [12:0] SumExpTmp, WExpTmp;
logic [53:0] NormSum;
logic [161:0] NormSumTmp;
logic [8:0] NormCnt;
logic NormSumSticky;
logic SumZero;
logic NegSum;
logic InvZ;
logic ResultDenorm;
logic Sticky;
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
logic Invalid,Underflow,Overflow,Inexact;
logic [8:0] DenormShift;
logic ProdInf, ProdOf, ProdUf;
logic [63:0] FmaResultTmp;
logic SubBySmallNum;
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = ReadData1M[63];
assign YSgn = ReadData2M[63];
assign ZSgn = ReadData3M[63];
assign XExp = ReadData1M[62:52];
assign YExp = ReadData2M[62:52];
assign ZExp = ReadData3M[62:52];
assign XMan = ReadData1M[51:0];
assign YMan = ReadData2M[51:0];
assign ZMan = ReadData3M[51:0];
// is it an FMSUB or FNMSUB instruction
assign IsSub = FOpCtrlM[0];
// Addition
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = IsSub ? ~(ZSgn ^ PSgn) : (ZSgn ^ PSgn);
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{2'b0,AlignedAddendM} : {2'b0,AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + InvZ;
// Is the sum negitive
assign NegSum = SumTmp[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
@ -110,25 +113,163 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
// Normalization
// Determine if the sum is zero
assign SumZero = ~(|Sum);
// Determine if the result is denormal
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp+13'd52)>=0);
// Determine the shift needed for denormal results
assign DenormShift = ResultDenorm ? SumExpTmp-1 : 6'b0;
// Normalize the sum
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = NormSumTmp[161:108];
// Calculate the sticky bit
assign NormSumSticky = (|NormSumTmp[107:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
assign SumExpTmp = KillProdM ? ZExp : ProdExpM + -({5'b0, NormCnt} - 13'd56);
assign SumExp = SumZero ? 12'b0 :
ResultDenorm ? 12'b0 :
SumExpTmp;
// Instantiate fraction datapath
add add(.*);
lza lza(.*);
normalize normalize(.zexp(ReadData3M[62:52]),.*);
round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
// Instantiate exponent datapath
expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*);
// Instantiate control logic
// Rounding
// round to nearest even
// {NormSum[1], NormSum[0], Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if NormSum[2] = 1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// round to zero - do nothing
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to -infinity - Plus1 if negitive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to infinity - Plus1 if positive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
// round to nearest max magnitude
// {NormSum[1], NormSum[0], Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// Deterimine if the result was supposed to be subtrated by a small number
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: Plus1Tmp = NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky&NormSum[2]));//round to nearest even
3'b001: Plus1Tmp = 0;//round to zero
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
3'b100: Plus1Tmp = (NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky)));//round to nearest max magnitude
default: Plus1Tmp = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: Minus1Tmp = 0;//round to nearest even
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
default: Minus1Tmp = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = Sticky | (|NormSum[1:0]) ? Plus1Tmp : 0;
assign Minus1 = Sticky | (|NormSum[1:0]) ? Minus1Tmp : 0;
// Compute rounded result
assign {WExpTmp, WMan} = {SumExp, NormSum[53:2]} + Plus1 - Minus1;
assign WExp = WExpTmp[10:0];
// Sign calculation
// Calculate the product's sign
assign PSgn = XSgn ^ YSgn;
// Determine the sign if the sum is zero
// if product underflows then use psign
// otherwise
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = Underflow ? PSgn :
(IsSub ? (PSgn^ZSgn ? PSgn : FrmM == 3'b010) :
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn));
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign resultsgn = InvZ&ZSgn&NegSum | InvZ&PSgn&~NegSum | (ZSgn&PSgn);
assign WSgn = SumZero ? zerosign : resultsgn;
sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*);
flag2 flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*);
// Select the result
assign FmaResultTmp = XNaNM ? {XSgn, XExp, 1'b1,XMan[50:0]} :
YNaNM ? {YSgn, YExp, 1'b1,YMan[50:0]} :
ZNaNM ? {ZSgn, ZExp, 1'b1,ZMan[50:0]} :
Invalid ? {WSgn, 11'h7ff, 1'b1, 51'b0} : // has to be before inf
XInfM ? {PSgn, XExp, XMan} :
YInfM ? {PSgn, YExp, YMan} :
ZInfM ? {ZSgn^IsSub, ZExp, ZMan} :
Overflow ? {WSgn, 11'h7ff, 52'b0} :
Underflow ? {WSgn, 63'b0} :
KillProdM ? ReadData3M - (Minus1&AddendStickyM) + (Plus1&AddendStickyM): // has to be after Underflow
{WSgn,WExp,WMan};
// Negate the result if FNMADD or FNSUB instruction
assign FmaResultM[63] = FOpCtrlM[1] ? ~FmaResultTmp[63] : FmaResultTmp[63];
assign FmaResultM[62:0] = FmaResultTmp[62:0];
// Set Invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) any input is a signaling NaN
assign ProdOf = (ProdExpM >= 2047 && ~ProdExpM[12]);
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
assign Invalid = (XNaNM&~XMan[51]) | (YNaNM&~YMan[51]) | (ZNaNM&~ZMan[51]) | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
assign Overflow = WExpTmp >= 2047 & ~WExpTmp[12];
// Set Underflow flag if the number is too small to be represented and isn't denormalized
assign ProdUf = KillProdM & ZZeroM;
assign Underflow = (WExpTmp[12] & ~ResultDenorm) | ProdUf;
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
assign Inexact = Sticky|Overflow|Underflow | (|NormSum[1:0]);
// Combine flags - FMA can't set the Divide by zero flag
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow, Inexact};
endmodule

View File

@ -1 +1,170 @@
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653
cce008007fffffff 7fe6e0fac3dc6e26 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 28027
c03fffffffffc800 7fdfffffffffe000 37f07ffffffffffc fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 44043
c7f000ffffffffef 7fefffffffffde00 4e1ffffffffffe7f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 107106
c7f00000dffffffe 7fe0000000000000 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 238237
ffdf0000001fffff 7feffffffffffffe 7fe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 310309
c79ff80003fffffe 7feffc0000003ffe 2bd0020000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 426425
ffeffffeffc00000 3fffffffffffffff 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 436435
d16ff800007fffff 7fe0000000000000 c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 517516
d10ffffffff3fffe 7feffffffffffffe b9d07f0000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 519518
442ff9fffffffffe ffefffffffffffff 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 553552
c34f24b48d2af3e7 7fef7fe000000000 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 577576
7fdfffffff8000ff c3f0100000000002 39300dfffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 593592
ffe00007fffffdfe 4340000000000001 ffd34131592163f6 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 654653
4b98eba3e512fb7b ffe84639040d967a 42c00000010001fe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 683682
ffed83a6b2e656b1 7fe0000000000001 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 796795
7fd5220b51609cf6 c030000000001020 7fdfbfffffffffdf fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 903902
c3d6eb6dede43198 7feffffffffffffe 3a6008000000000f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1078076
c1f02000001fffff 7fe0000000000001 e8f000000040000f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1285283
c1cdfffbffffffff 7fe0000000000001 bca0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1355353
43447336acaf7bd8 ffeffffffffffffe 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1391389
4010000000fff7ff ffe0000000000000 7fdfffc000003ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1528526
ffe0000002000003 47fffc00000007ff 93b0040000002000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1597595
4060000200000400 ffe0000000000000 7fe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1598596
fe7007fffdffffff 7fdffffffffff03e 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1631629
4000000000000000 ffe0000000000001 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1738736
4000000000000000 ffeffffffffffffe 4263dd4adb450db9 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1740738
40200001ffc00000 ffe0000000000000 3fdfcfffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1807805
400ffffffffffffe ffd00013fffffffe 40200000100001ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1941939
400ffffffffffffe ffe0000000000001 c00fffe003ffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1947945
7fe00000080000fe bfffffffffffffff 3fd002000000003f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2006003
4010000000000000 ffe0000000000001 7feffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2018015
4010000000000000 ffeffffffffffffe bf7ffffffff80001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2020017
43ffffd000000000 ffe0000000000000 613ffffffffffe1e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2087084
c1fb6efe117a3ae3 7fefffffffffffff 43c0000001effffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2123120
ffdfffffc0000000 7fe0000002002000 3fffffffffbfff80 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2147144
401ffffffffffffe ffe0000000000001 7c300040000000ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2227224
4340000000000000 ffe0000000000001 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2297294
c0f0000000203fff 7fefffffffffffff c921fffffffffefe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2402399
7fedffffffdfffff c7f0400000000008 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2406403
434fffffffffffff ffd0000008fffffe c03fffffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2419416
41dfffffffe00003 ffe0000000000001 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2436433
c1f0000000037fff 7fdffffffff7ffc0 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2451448
ffebfffffffffbff 4010000000000001 bf20001fffffffe0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2465462
ffe000020001ffff 7fdfdffff7ffffff 41d000083fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2471468
434ffffffffffffe ffe0000000000001 bf1fffffc00003ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2506503
7fe0000000000000 c1c0000001ffffbf 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2538535
7fe0000000000000 c1d264933e9e988c 3ca0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2565562
7fe0000000000000 c00fffffffffffff bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2567564
7fe0000000000000 c010000000000001 403400003fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2569566
7fe0000000000001 c3d0bfffffffffff a9817e19c25e6ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2590587
7fe0000000000001 c1c01feffffffffe 3fe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2592589
7fe0000000000001 f860000ffbfffffe 4000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2619616
7fe0000000000001 c1e29f751d0db106 41dff88000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2626623
7fe0000000000001 c010000000000001 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2639636
7fe0000000000001 c340000000000000 41e9bfbd1705ab74 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2641638
7fe0000000000001 c1ffffc0007fffff c0e00000003f8000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2644641
7fefffffffffffff c3cfff000003ffff c01fffffefbfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2653650
c00000ffc0000000 7fefffffffff81ff 00199d0888644678 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2660657
7fefffffffffffff c01fffe00000003e 3cdedfffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2671668
7fefffffffffffff c7e00800ffffffff c010000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2682679
7fefffffffffffff c3f50270323fdbca 3fe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2691688
7fefffffffffffff c06f000000000006 8010000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2700697
7fefffffffffffff bff0000000000001 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2702699
7fefffffffffffff bffffffffffffffe 47edd848c981ea6a fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2704701
7fefffffffffffff d6f0007fbfffffff 380ff8000000001f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2707704
7fefffffffffffff c167c6ca402625fe ffe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2709706
7fefffffffffffff c340000000000000 7feffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2711708
7fefffffffffffff c34fffffffffffff c1a3cdb48240da83 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2713710
7feffffffffffffe c01580f1a3e9c31d 3d258f8ba280bed4 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2725722
7feffffffffffffe ffd800001fffffff bfd0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2727724
7feffffffffffffe c27a98a4d75fad64 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2736733
c01ffffffe03ffff 7fd00000000c0000 c00ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2739736
7feffffffffffffe c3f01ffffff00000 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2745742
7feffffffffffffe c0550d69ccececd4 403ffffff83fffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2761758
7feffffffffffffe c00fffffffffffff b81080ffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2776773
7feffffffffffffe c0020ec4bd7f8123 403894684b0415af fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2779776
7feffffffffffffe c34ffffffffffffe 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2783780
7feffffffffffffe ffe0000000000001 43c0000000000bfe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2785782
7feffffffffffffe c1f000000003ff7f 40017ffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2788785
bf9ffffffd800000 7fefffffffffffff ffefffffffbfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2960957
e8d01e2c59865900 7fe05fffffffffff c34ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2964961
ffd917679344f70e 401fffffffffffff c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3094090
4470000023ffffff ffe0000000000001 b802000001ffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3204200
43627f4abb7a5c8e ffefffffffffffff 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3274270
c1c0000820000000 7feffffffff8001f 402000100000007f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3332328
c1cd41643238b450 7feffffffffffffe 3f4012189596a55a fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3519515
c80ea7921c438451 7fe008000000007e 424153696dc450d3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3552548
4f000fffffffffff ffefffffffffffff 4010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3553549
7fe1868cfb076bc1 c34000000000037f b7effffc003ffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3719715
c3fff9fffffffffe 7fe0000000000000 3d6000008000000e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3726722
43f007ffbfffffff ffefffffffffffff 43dffffeffffffbf fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3762758
7fdfffdfffffffbe c01fffffffffffff 3fd0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3895891
ffeefffffffffff7 43e0003ffffeffff b7f000001fdfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4125120
4800002000000007 ffe0000000000000 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4319314
43f856a5096bfc0d ffeffffffffffffe 3fd0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4391386
c009c2b9147e606c 7fe0000002007fff bfa004001ffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4440435
4030008000003fff ffe0000000000000 b810eaddea941d3f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4528523
67affffff8000006 f3016e70e2a6bd2f c1edddf29e459b21 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4548543
ffe07ffbffffffff 5026589203bb88d1 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4586581
43dffffc00000003 ffe0000000000000 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4598593
ffdfffffff800003 4010000000000001 c290000080000002 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4627622
ffd001fffffffbff 4010000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4697692
bffffffffffffffe 7fefffffffffffff 3d30040000200000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4704699
c000000000000000 7fefffffffffffff bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4774769
c000000000000000 7fe9d625d7f2ee96 380ffeffffffc000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4797792
41efffffbfffdfff ffe0000000000000 bbf0000003f80000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4807802
fcf00000000003e0 7fdfffffffc02000 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4892887
c00ffffffffffffe 7fe0000000000000 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4981976
c00ffffffffffffe 7fefffffffffffff 4020e8f734a930e7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4983978
ffeffffc01fffffe 43d0000000000000 3806864c983757ae fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5030024
41b0000000010007 ffe0000000000001 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5157151
c3e413dc0ee29162 7fefffffffffffff 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5193187
c01ffffffffffffe 7fe0000000000000 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5261255
c01ffffffffffffe 7fefffffffffffff c1c177d35a8a07ad fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5263257
c340000000000000 7feffffffffffffe 3ffffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5333327
c34ff0000003fffe 7fefffffffffffff c0101442690e84e3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5402396
c340000000000001 7fe41774eee28bfa 37efffff000000ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5437431
c34fffffffffffff 7fe0000000000000 4010008001fffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5470464
c34ffffffffffffe 7fe0000000000000 bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5540534
c34ffffffffffffe 7feffffffffffffe c7e6b68e99fe64db fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5542536
ffe0000000000000 41effffff7fffffe 2a7000207fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5590584
ffe0000000000000 40b00000000008ff 4013ac1788ee2681 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5599593
ffe0000000000000 4010000000000000 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5603597
ffe0000000000000 401fffffffffffff 0012000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5605599
ffe0000000000000 45e00007fff7ffff 9c80852a49e348a6 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5608602
ffe0000000000000 41e6d2bd893fa49f 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5610604
ffe0000000000000 7feffffffffffffe 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5612606
ffe0000000000000 4804ecddd4dee74f 9700000101fffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5617611
ffe0000000000000 47e0400000000100 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5619613
ffe0000000000000 41d0000000001fff 800007ffffffdfff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5626620
ffe0000000000001 4c7ffffffff87fff 3fbfdffffffffff7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5662656
ffe0000000000001 401ffffffffffffe 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5675669
ffe0000000000001 4340000000000001 48700003fffefffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5677671
ffe0000000000001 4000f2f5230ef1a6 382efffffeffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5689683
ffe0000000000001 407b2a20706ca02f bcc8eea3de85c218 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5707701
41efdffffffbfffe ffe0000000000001 bca0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5715709
ffe0000000000001 43e000000000ffff 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5718712
ffedffffffff7fff 7f500000001fffff 469cefa7e05db8e7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5728722
ffefffffffffffff 3fffffffffffffff bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5738732
ffefffffffffffff 4000000000000001 800ffffffdffe000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5740734
ffefffffffffffff 7fe0000000000000 3fdffffffffffe1f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5749743
ffd44208deea7d5b 7fdffffcffffffff caf0000000007fff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5764758
ffefffffffffffff 43cffff6ffffffff 47ffba85ed27c05e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5779773
ffeffffffffffffe 40b0000fffffffc0 bfd0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5799793
ffeffffffffffffe 43ea49f9e3cf97b4 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5808802
ffeffffffffffffe 4000000000000001 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5810804
ffeffffffffffffe 4010000000000000 bc800001ffffffe0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5812806
ffeffffffffffffe 7fe0000000000000 c34ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5819813
ffeffffffffffffe 7feffffffffffffe c1efff801fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5821815
ffdfffffc0007ffe 4340000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5886880
c4a000001ffeffff 7fe0000000000000 b80fc03ffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5888882
ffdfffff00000040 48f00001bfffffff c00ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5910904
c37ffffffffffbf0 7fd1800000000000 bfa7e7cad560a3d0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5912906
c1700000000007f7 7feffffffffffffe 3f6ff7ffffffefff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6240233
c3fffffffdfe0000 7fe0000000000000 c34fff6000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6447440
400ffffdfffff7fe ffefffffffffffff 41de000000007ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6483476
4030000000004020 ffe88b9c477c3a97 ffe007ffff000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6575568
7fe00807ffffffff c1e0000000007fe0 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6676669
ffdfc00000000800 7fe0000000000000 bcffffffffffefef fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6726719
7feffffeffffbfff c34ffffffffffffe c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6760753
42bff00000000010 ffefffffffffffff c3003a94038a1ec3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6762755
c3c00ffffffffeff 7feddda224891f86 43d0aa9335103e61 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6782775
c08ff80000000400 7fe0000000000001 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6796789
c07fffdfffffffbe 7feffffffffffffe 474ffffffdffff80 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6798791
c01fffffeffff7ff 7fd0080080000000 bff26df7cf61cdd5 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6827820
c7effff000000004 7fe0000008000fff 4770000007ffbfff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6863856
7fe85e6f4033d7dd c000000000000000 bfe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7031023
c1f732bc454b0563 7fe0000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7076068
ffe000000fffffbe 401ffffffffffffe b80d2116944eef72 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7141133
ffd0002000001fff 40e00003ffffefff c03fffffffe80000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7242234

Binary file not shown.

View File

@ -26,13 +26,13 @@ void main() {
char ans[81];
char flags[3];
int FrmE;
long stop = 1119653;
int debug = 1;
long stop = 5587581;
int debug = 0;
//my_string = (char *) malloc (nbytes + 1);
//bytes_read = getline (&my_string, &nbytes, stdin);
for(n=0; n < 305; n++) {//613 for 10000
for(n=0; n < 1000; n++) {//613 for 10000
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
if(k == stop && debug == 1) break;
k++;

File diff suppressed because it is too large Load Diff

View File

@ -11,26 +11,25 @@ module tb;
wire [4:0] FmaFlagsM;
wire [12:0] aligncntE; // status flags
wire [105:0] rE; // one result of partial product sum
wire [105:0] sE; // other result of partial products
wire [163:0] tE; // wire of alignment shifter
wire [105:0] ProdManE; // other result of partial products
wire [161:0] AlignedAddendE; // wire of alignment shifter
wire [8:0] normcntE; // shift count for normalizer
wire [12:0] aeE; // multiplier expoent
wire bsE; // sticky bit of addend
wire killprodE; // ReadData3E >> product
wire [12:0] ProdExpE; // multiplier expoent
wire AddendStickyE; // sticky bit of addend
wire KillProdE; // ReadData3E >> product
wire prodofE; // ReadData1E*ReadData2E out of range
wire xzeroE;
wire XZeroE;
wire yzeroE;
wire zzeroE;
wire xdenormE;
wire ydenormE;
wire zdenormE;
wire xinfE;
wire yinfE;
wire zinfE;
wire xnanE;
wire ynanE;
wire znanE;
wire XDenormE;
wire YDenormE;
wire ZDenormE;
wire XInfE;
wire YInfE;
wire ZInfE;
wire XNaNE;
wire YNaNE;
wire ZNaNE;
wire nanE;
wire [8:0] sumshiftE;
wire sumshiftzeroE;
@ -45,16 +44,16 @@ reg ansnan;
reg [105:0] s; // partial product 2
reg [51:0] xnorm;
reg [51:0] ynorm;
wire [3:0] FOpCtrlM;
assign FOpCtrlM = 4'b0;
localparam period = 20;
fma1 UUT1(.*);
fma2 UUT2(.ReadData1M(ReadData1E), .ReadData2M(ReadData2E), .ReadData3M(ReadData3E), .FrmM(FrmE),
.aligncntM(aligncntE), .rM(rE), .sM(sE),
.tM(tE), .normcntM(normcntE), .aeM(aeE), .bsM(bsE),.killprodM(killprodE),
.xzeroM(xzeroE), .yzeroM(yzeroE),.zzeroM(zzeroE),.xdenormM(xdenormE),.ydenormM(ydenormE),
.zdenormM(zdenormE),.xinfM(xinfE),.yinfM(yinfE),.zinfM(zinfE),.xnanM(xnanE),.ynanM(ynanE),.znanM(znanE),
.nanM(nanE),.sumshiftM(sumshiftE),.sumshiftzeroM(sumshiftzeroE), .prodinfM(prodinfE), .*);
fma2 UUT2(.ReadData1M(ReadData1E), .ReadData2M(ReadData2E), .ReadData3M(ReadData3E), .FrmM(FrmE), .ProdManM(ProdManE),
.AlignedAddendM(AlignedAddendE), .ProdExpM(ProdExpE), .AddendStickyM(AddendStickyE),.KillProdM(KillProdE),
.XZeroM(XZeroE),.YZeroM(YZeroE),.ZZeroM(ZZeroE),.XInfM(XInfE),.YInfM(YInfE),.ZInfM(ZInfE),.XNaNM(XNaNE),.YNaNM(YNaNE),.ZNaNM(ZNaNE), .*);
initial

View File

@ -1,65 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Block Name: add.v
// Author: David Harris
// Date: 11/12/1995
//
// Block Description:
// This block performs the addition of the product and addend. It also
// contains logic necessary to adjust the signs for effective subtracts
// and negative results.
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
module add(rM, sM, tM, sum,
negsum, invz, selsum1, negsum0, negsum1, killprodM);
////////////////////////////////////////////////////////////////////////////////
input logic [105:0] rM; // partial product 1
input logic [105:0] sM; // partial product 2
input logic [163:0] tM; // aligned addend
input logic invz; // invert addend
input logic selsum1; // select +1 mode of compound adder
input logic killprodM; // z >> product
input logic negsum; // Negate sum
output logic [163:0] sum; // sum
output logic negsum0; // sum was negative in +0 mode
output logic negsum1; // sum was negative in +1 mode
// Internal nodes
wire [105:0] r2; // partial product possibly zeroed out
wire [105:0] s2; // partial product possibly zeroed out
wire [164:0] t2; // addend after inversion if necessary
wire [164:0] sum0; // sum of compound adder +0 mode
wire [164:0] sum1; // sum of compound adder +1 mode
wire [163:0] prodshifted; // sum of compound adder +1 mode
wire [164:0] tmp; // sum of compound adder +1 mode
// Invert addend if z'sM sign is diffrent from the product'sM sign
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
// Zero out product if Z >> product or product really should be
assign r2 = killprodM ? 106'b0 : rM;
assign s2 = killprodM ? 106'b0 : sM;
//***replace this with a more structural cpa that synthisises better
// Compound adder
// Consists of 3:2 CSA followed by long compound CPA
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
// Check sign bits in +0/1 modes
assign negsum0 = sum0[164];
assign negsum1 = sum1[164];
// Mux proper result (+Oil mode and inversion) using 4:1 mux
//assign sumzero = |sum;
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
endmodule

View File

@ -1,88 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: align.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the alignment shifter. It is responsible for
// adjusting the fraction portion of the addend relative to the fraction
// produced in the multiplier array.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
killprodE, sumshiftE, sumshiftzeroE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] zman; // Fraction of addend z;
input logic [12:0] aligncntE; // amount to shift
input logic xzeroE; // Input X = 0
input logic yzeroE; // Input Y = 0
input logic zzeroE; // Input Z = 0
input logic zdenormE; // Input Z is denormalized
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
output logic bsE; // sticky bit of addend
output logic killprodE; // Z >> product
output logic [8:0] sumshiftE;
output logic sumshiftzeroE;
// Internal nodes
reg [215:0] shift; // aligned addend from shifter
logic [12:0] tmp;
always_comb
begin
// Default to clearing sticky bits
bsE = 0;
// And to using product as primary operand in adder I exponent gen
killprodE = xzeroE | yzeroE;
// d = aligncntE
// p = 53
//***try reducing this hardware to use one shifter
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
//product ancored case with saturated shift
sumshiftE = 163; // 3p+4
sumshiftzeroE = 0;
shift = {1'b1,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
// product ancored or cancellation
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0]; // p + 2 - d
sumshiftzeroE = 0;
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
// addend ancored case
// used to be 56 \/ somthing doesn't seem right too many typos
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0];
sumshiftzeroE = 0;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else begin // d >= p+3
// addend anchored case with saturated shift
sumshiftE = 0;
sumshiftzeroE = 1;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
killprodE = 1;
end
end
endmodule

View File

@ -1,53 +0,0 @@
module booth(xExt, choose, add1, e, pp);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] xExt; // multiplicand xExt
input logic [2:0] choose; // bits needed to choose which encoding
output logic [1:0] add1; // do you add 1
output logic e;
output logic [54:0] pp; // the resultant encoding
logic [54:0] temp;
logic [53:0] negx;
//logic temp;
assign negx = ~xExt;
always_comb
case (choose)
3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1
3'b010 : pp = {1'b0, xExt}; // 1
3'b011 : pp = {xExt, 1'b0}; // 2
3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = '1; // -0
endcase
always_comb
case (choose)
3'b000 : e = 0; // 0
3'b001 : e = 0; // 1
3'b010 : e = 0; // 1
3'b011 : e = 0; // 2
3'b100 : e = 1; // -2
3'b101 : e = 1; // -1
3'b110 : e = 1; // -1
3'b111 : e = 1; // -0
endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2];
always_comb
case (choose)
3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1
3'b010 : add1 = 2'b0; // 1
3'b011 : add1 = 2'b0; // 2
3'b100 : add1 = 2'b10; // -2
3'b101 : add1 = 2'b1; // -1
3'b110 : add1 = 2'b1; // -1
3'b111 : add1 = 2'b1; // -0
endcase
endmodule

View File

@ -1,93 +0,0 @@
// //***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:37: ASSIGNW
// //%Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:32: wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout
// module add3comp2(a, b, c, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into diffrent implementations of the compressors?
// parameter BITS = 4;
// input logic [BITS-1:0] a;
// input logic [BITS-1:0] b;
// input logic [BITS-1:0] c;
// output logic [BITS-1:0] carry;
// output logic [BITS-1:0] sum;
// genvar i;
// generate
// for(i= 0; i<BITS; i=i+1) begin
// sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
// end
// endgenerate
// endmodule
// module add4comp2(a, b, c, d, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// parameter BITS = 4;
// input logic [BITS-1:0] a;
// input logic [BITS-1:0] b;
// input logic [BITS-1:0] c;
// input logic [BITS-1:0] d;
// output logic [BITS:0] carry;
// output logic [BITS-1:0] sum;
// logic [BITS-1:0] cout;
// logic carryTmp;
// genvar i;
// sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
// generate
// for(i= 1; i<BITS-1; i=i+1) begin
// sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
// end
// endgenerate
// sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
// assign carry[BITS-1] = carryTmp & cout[BITS-1];
// assign carry[BITS] = carryTmp ^ cout[BITS-1];
// endmodule
// module sng3comp2(a, b, c, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into diffrent implementations of the compressors?
// input logic a;
// input logic b;
// input logic c;
// output logic carry;
// output logic sum;
// logic axorb;
// assign axorb = a ^ b;
// assign sum = axorb ^ c;
// assign carry = axorb ? c : a;
// endmodule
// module sng4comp2(a, b, c, d, cin, cout, carry, sum);
// /////////////////////////////////////////////////////////////////////////////
// //look into pass gate 4:2 counters?
// input logic a;
// input logic b;
// input logic c;
// input logic d;
// input logic cin;
// output logic cout;
// output logic carry;
// output logic sum;
// logic TmpSum;
// sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
// sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
// endmodule

View File

@ -1,90 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
xdenormE, ydenormE, zdenormE,
aligncntE, prodof, aeE);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic xdenormE; // Z is denorm
input logic ydenormE; // Z is denorm
input logic zdenormE; // Z is denorm
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [12:0] aligncntE; // shift count for alignment shifter
output logic prodof; // X*Y exponent out of bounds
output logic [12:0] aeE; //exponent of multiply
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
assign prodof = (aeE > 2046 && ~aeE[12]);
// Compute alignment shift count
// Adjust for postrounding normalization of Z.
// This should not increas the critical path because the time to
// check if a round overflows is shorter than the actual round and
// is masked by the bypass mux and two 10 bit adder delays.
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
//assign aligncntE = zexp - aeE;// KEP use all of aeE
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : aeE;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
endmodule

View File

@ -1,108 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen2(xexp, yexp, zexp,
sumzero, resultdenorm, infinity,
FmaFlagsM, inf, expplus1,
nanM, de0, xnanM, ynanM, znanM, specialsel,
wexp,
sumof, sumuf);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic sumzero; // sum exactly equals zero
input logic resultdenorm; // postnormalize rounded result
input logic infinity; // generate infinity on overflow
input logic [4:0] FmaFlagsM; // Result invalid
input logic inf; // Some input is infinity
input logic nanM; // Some input is NaN
input logic [12:0] de0; // X is NaN NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic expplus1;
input logic specialsel; // Select special result
output logic [62:52] wexp; // Exponent of result
output logic sumof; // X*Y+Z exponent out of bounds
output logic sumuf; // X*Y+Z exponent underflows
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : ae;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// check for exponent out of bounds after add
assign de = resultdenorm | sumzero ? 0 : de0;
assign sumof = ~de[12] && de > 2046;
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
// IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload."
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
// A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
endmodule

View File

@ -168,8 +168,8 @@ module fctrl (
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmadd = ?010
// fnmsub = ?011
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, is negitive, is sub}
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end

View File

@ -1,34 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
/////////////////////////////////////////////////////////////////////////////
input logic xnanE; // X is NaN
input logic ynanE; // Y is NaN
input logic znanE; // Z is NaN
input logic prodof; // X*Y overflows exponent
output logic nanE; // Some source is NaN
// Internal nodes
output logic prodinfE; // X*Y larger than max possible
// If any input logic is NaN, propagate the NaN
assign nanE = xnanE || ynanE || znanE;
// Generate infinity checks
assign prodinfE = prodof && ~xnanE && ~ynanE;
endmodule

View File

@ -1,80 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
xzeroM, yzeroM, zzeroM, vbits, killprodM,
inf, nanM, FmaFlagsM,sticky,prodinfM);
/////////////////////////////////////////////////////////////////////////////
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic xsign; // Sign of z
input logic ysign; // Sign of z
input logic zsign; // Sign of z
input logic sticky; // X is Inf
input logic prodinfM;
input logic xinfM; // X is Inf
input logic yinfM; // Y is Inf
input logic zinfM; // Z is Inf
input logic sumof; // X*Y + z underflows exponent
input logic sumuf; // X*Y + z underflows exponent
input logic xzeroM; // x = 0
input logic yzeroM; // y = 0
input logic zzeroM; // y = 0
input logic killprodM;
input logic [1:0] vbits; // R and S bits of result
output logic inf; // Some source is Inf
input logic nanM; // Some source is NaN
output logic [4:0] FmaFlagsM;
// Internal nodes
logic suminf;
// Same with infinity (inf - inf and O * inf don't propagate inf
// but it's ok becaue illegal op takes higher precidence)
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
//assign inf= xinfM || yinfM || zinfM;//original
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input logic is denormalized
// 2) output logic would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
endmodule

View File

@ -1,103 +1,141 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Exponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to FInput1E or FInput3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the FInput1E or FInput3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma1(FInput1E, FInput2E, FInput3E, FrmE,
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
xinfE, yinfE, zinfE, nanE, prodinfE);
/////////////////////////////////////////////////////////////////////////////
module fma1(
input logic [63:0] FInput1E; // input 1
input logic [63:0] FInput2E; // input 2
input logic [63:0] FInput3E; // input 3
input logic [2:0] FrmE; // Rounding mode
output logic [12:0] aligncntE; // status flags
output logic [105:0] rE; // one result of partial product sum
output logic [105:0] sE; // other result of partial products
output logic [163:0] tE; // output logic of alignment shifter
output logic [12:0] aeE; // multiplier expoent
output logic bsE; // sticky bit of addend
output logic killprodE; // FInput3E >> product
output logic xzeroE;
output logic yzeroE;
output logic zzeroE;
output logic xdenormE;
output logic ydenormE;
output logic zdenormE;
output logic xinfE;
output logic yinfE;
output logic zinfE;
output logic xnanE;
output logic ynanE;
output logic znanE;
output logic nanE;
output logic prodinfE;
output logic [8:0] sumshiftE;
output logic sumshiftzeroE;
input logic [63:0] FInput1E,
input logic [63:0] FInput2E,
input logic [63:0] FInput3E,
input logic [3:0] FOpCtrlE,
output logic [105:0] ProdManE,
output logic [161:0] AlignedAddendE,
output logic [12:0] ProdExpE,
output logic AddendStickyE,
output logic KillProdE,
output logic XZeroE, YZeroE, ZZeroE,
output logic XInfE, YInfE, ZInfE,
output logic XNaNE, YNaNE, ZNaNE);
// Internal nodes
// output logic [12:0] aligncntE; // shift count for alignment
logic [51:0] XMan,YMan,ZMan;
logic [10:0] XExp,YExp,ZExp;
logic XSgn,YSgn,ZSgn;
logic [12:0] AlignCnt;
logic [211:0] Shift;
logic XDenormE, YDenormE, ZDenormE;
logic [63:0] FInput3E2;
// Set addend to zero if FMUL instruction
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = FInput1E[63];
assign YSgn = FInput2E[63];
assign ZSgn = FInput3E2[63];
assign XExp = FInput1E[62:52];
assign YExp = FInput2E[62:52];
assign ZExp = FInput3E2[62:52];
assign XMan = FInput1E[51:0];
assign YMan = FInput2E[51:0];
assign ZMan = FInput3E2[51:0];
logic prodof; // FInput1E*FInput2E out of range
// determine if an input is a special value
assign XNaNE = &FInput1E[62:52] && |FInput1E[51:0];
assign YNaNE = &FInput2E[62:52] && |FInput2E[51:0];
assign ZNaNE = &FInput3E2[62:52] && |FInput3E2[51:0];
assign XDenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
assign YDenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
assign ZDenormE = ~(|FInput3E2[62:52]) && |FInput3E2[51:0];
assign XInfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
assign YInfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
assign ZInfE = &FInput3E2[62:52] && ~(|FInput3E2[51:0]);
assign XZeroE = ~(|FInput1E[62:0]);
assign YZeroE = ~(|FInput2E[62:0]);
assign ZZeroE = ~(|FInput3E2[62:0]);
// Calculate the product's exponent
// - When multipliying two fp numbers, add the exponents
// - Subtract 3ff to remove one of the biases (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
{2'b0, XExp} + {2'b0, YExp} - 13'h3ff + {12'b0, XDenormE} + {12'b0, YDenormE};
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = {53'b0,~(XDenormE|XZeroE),XMan} * {53'b0,~(YDenormE|YZeroE),YMan};
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - {2'b0, ZExp} - {12'b0, ZDenormE};
// Alignment shifter
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the decimal point (xx.xxxxxxxxxx...)
always_comb
begin
// Set default values
AddendStickyE = 0;
KillProdE = 0;
// If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
AlignedAddendE = {107'b0, ~(ZZeroE|ZDenormE),ZMan,2'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} << -AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} >> AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
AlignedAddendE = 162'b0;
AddendStickyE = ~ZZeroE;
// Instantiate fraction datapath
multiply multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*);
align align(.zman(FInput3E[51:0]),.*);
// Instantiate exponent datapath
expgen1 expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
// Instantiate control output logic
flag1 flag1(.*);
end
end
endmodule

View File

@ -1,107 +1,110 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Mxponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to FInput1M or FInput3M input logics
// sign One bit sign handling block
// special Catch special cases (input logics = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the FInput1M or FInput3M input logics for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IMMM flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma2(FInput1M, FInput2M, FInput3M, FrmM,
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
tM, normcntM, aeM, bsM,killprodM,
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
zdenormM,xinfM,yinfM,zinfM,xnanM,ynanM,znanM,
nanM,sumshiftM,sumshiftzeroM,prodinfM
);
/////////////////////////////////////////////////////////////////////////////
module fma2(
input logic [63:0] FInput1M; // input logic 1
input logic [63:0] FInput2M; // input logic 2
input logic [63:0] FInput3M; // input logic 3
input logic [2:0] FrmM; // Rounding mode
input logic [12:0] aligncntM; // status flags
input logic [105:0] rM; // one result of partial product sum
input logic [105:0] sM; // other result of partial products
input logic [163:0] tM; // output of alignment shifter
input logic [8:0] normcntM; // shift count for normalizer
input logic [12:0] aeM; // multiplier expoent
input logic bsM; // sticky bit of addend
input logic killprodM; // FInput3M >> product
input logic prodinfM;
input logic xzeroM;
input logic yzeroM;
input logic zzeroM;
input logic xdenormM;
input logic ydenormM;
input logic zdenormM;
input logic xinfM;
input logic yinfM;
input logic zinfM;
input logic xnanM;
input logic ynanM;
input logic znanM;
input logic nanM;
input logic [8:0] sumshiftM;
input logic sumshiftzeroM;
output logic [63:0] FmaResultM; // output FmaResultM=FInput1M*FInput2M+FInput3M
output logic [4:0] FmaFlagsM; // status flags
input logic [63:0] FInput1M,
input logic [63:0] FInput2M,
input logic [63:0] FInput3M,
input logic [2:0] FrmM,
input logic [105:0] ProdManM,
input logic [161:0] AlignedAddendM,
input logic [12:0] ProdExpM,
input logic AddendStickyM,
input logic KillProdM,
input logic [3:0] FOpCtrlM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic XNaNM, YNaNM, ZNaNM,
output logic [63:0] FmaResultM,
output logic [4:0] FmaFlagsM);
// Internal nodes
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
logic negsum0; // sum +0 < 0
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic sumof; // result out of range
logic zexpsel;
logic denorm0;
logic resultdenorm;
logic inf;
logic specialsel;
logic expplus1;
logic sumuf;
logic psign;
logic sticky;
logic [12:0] de0;
logic isAdd;
logic wsign;
logic [51:0] wman;
logic [10:0] wexp;
assign isAdd = 1;
logic [51:0] XMan, YMan, ZMan, WMan;
logic [10:0] XExp, YExp, ZExp, WExp;
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
logic [105:0] ProdMan2;
logic [162:0] AlignedAddend2;
logic [161:0] Sum;
logic [162:0] SumTmp;
logic [12:0] SumExp;
logic [12:0] SumExpMinus1;
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
logic [53:0] NormSum;
logic [161:0] NormSumTmp;
logic [8:0] NormCnt;
logic NormSumSticky;
logic SumZero;
logic NegSum;
logic InvZ;
logic ResultDenorm;
logic Sticky;
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
logic Invalid,Underflow,Overflow,Inexact;
logic [8:0] DenormShift;
logic ProdInf, ProdOf, ProdUf;
logic [63:0] FmaResultTmp;
logic SubBySmallNum;
logic [63:0] FInput3M2;
logic ZeroSgn, ResultSgn;
// Set addend to zero if FMUL instruction
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = FInput1M[63];
assign YSgn = FInput2M[63];
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign XExp = FInput1M[62:52];
assign YExp = FInput2M[62:52];
assign ZExp = FInput3M2[62:52];
assign XMan = FInput1M[51:0];
assign YMan = FInput2M[51:0];
assign ZMan = FInput3M2[51:0];
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
// Addition
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
// Is the sum negitive
assign NegSum = SumTmp[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
@ -113,27 +116,160 @@ module fma2(FInput1M, FInput2M, FInput3M, FrmM,
// Normalization
// Determine if the sum is zero
assign SumZero = ~(|Sum);
// Determine if the result is denormal
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp+13'd52)>=0);
// Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Normalize the sum
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = NormSumTmp[161:108];
// Calculate the sticky bit
assign NormSumSticky = (|NormSumTmp[107:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 :
SumExpTmp;
// Instantiate fraction datapath
add add(.*);
lza lza(.*);
normalize normalize(.zexp(FInput3M[62:52]),.*);
round round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*);
// Instantiate exponent datapath
expgen2 expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*);
// Instantiate control logic
// Rounding
// round to nearest even
// {NormSum[1], NormSum[0], Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if NormSum[2] = 1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// round to zero - do nothing
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to -infinity - Plus1 if negitive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to infinity - Plus1 if positive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
// round to nearest max magnitude
// {NormSum[1], NormSum[0], Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// Deterimine if the result was supposed to be subtrated by a small number
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: Plus1Tmp = NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky&NormSum[2]));//round to nearest even
3'b001: Plus1Tmp = 0;//round to zero
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
3'b100: Plus1Tmp = (NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky)));//round to nearest max magnitude
default: Plus1Tmp = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: Minus1Tmp = 0;//round to nearest even
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
default: Minus1Tmp = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = Sticky | (|NormSum[1:0]) ? Plus1Tmp : 1'b0;
assign Minus1 = Sticky | (|NormSum[1:0]) ? Minus1Tmp : 1'b0;
// Compute rounded result
assign {WExpTmp, WMan} = {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1};
assign WExp = WExpTmp[10:0];
// Sign calculation
// Determine the sign if the sum is zero
// if product underflows then use psign
// otherwise
// if cancelation then 0 unless round to -inf
// otherwise psign
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
sign sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*);
flag2 flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*);
// Select the result
assign FmaResultM = XNaNM ? {XSgn, XExp, 1'b1,XMan[50:0]} :
YNaNM ? {YSgn, YExp, 1'b1,YMan[50:0]} :
ZNaNM ? {ZSgn, ZExp, 1'b1,ZMan[50:0]} :
Invalid ? {WSgn, 11'h7ff, 1'b1, 51'b0} : // has to be before inf
XInfM ? {PSgn, XExp, XMan} :
YInfM ? {PSgn, YExp, YMan} :
ZInfM ? {ZSgn, ZExp, ZMan} :
Overflow ? {WSgn, 11'h7ff, 52'b0} :
Underflow & ~ResultDenorm ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} :
KillProdM ? {ZSgn, ZExp, ZMan} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)}: // has to be after Underflow
{WSgn,WExp,WMan};
assign FmaResultM = {wsign,wexp,wman};
// Set Invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) any input is a signaling NaN
assign ProdOf = (ProdExpM >= 2047 && ~ProdExpM[12]);
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
assign Invalid = (XNaNM&~XMan[51]) | (YNaNM&~YMan[51]) | (ZNaNM&~ZMan[51]) | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
assign Overflow = WExpTmp >= 2047 & ~WExpTmp[12];
// Set Underflow flag if the number is too small to be represented in normal numbers
assign ProdUf = KillProdM & ZZeroM;
assign Underflow = SumExp[12] | ProdUf;
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
assign Inexact = Sticky|Overflow| (|NormSum[1:0]);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result is exact
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
endmodule

View File

@ -80,34 +80,17 @@ module fpu (
logic [4:0] FDivFlagsM, FDivFlagsW;
// FMA signals
logic [12:0] aligncntE, aligncntM;
logic [105:0] rE, rM;
logic [105:0] sE, sM;
logic [163:0] tE, tM;
logic [8:0] normcntE, normcntM;
logic [12:0] aeE, aeM;
logic bsE, bsM;
logic killprodE, killprodM;
logic prodofE, prodofM;
logic xzeroE, xzeroM;
logic yzeroE, yzeroM;
logic zzeroE, zzeroM;
logic xdenormE, xdenormM;
logic ydenormE, ydenormM;
logic zdenormE, zdenormM;
logic xinfE, xinfM;
logic yinfE, yinfM;
logic zinfE, zinfM;
logic xnanE, xnanM;
logic ynanE, ynanM;
logic znanE, znanM;
logic nanE, nanM;
logic [8:0] sumshiftE, sumshiftM;
logic sumshiftzeroE, sumshiftzeroM;
logic prodinfE, prodinfM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
logic [105:0] ProdManE, ProdManM;
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
@ -241,7 +224,7 @@ module fpu (
.CLK(clk),
.ECLK(fpdivClk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .*);
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
@ -265,31 +248,20 @@ module fpu (
//*****************
// fma E/M pipe registers
//*****************
flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM);
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM);
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM);
flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM);
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM);
//*****************
// fpadd E/M pipe registers

View File

@ -1,40 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: lop.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements a Leading One Predictor used to determine
// the normalization shift count.
///////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module lza(sum, normcnt, sumzero);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
output logic [8:0] normcnt; // normalization shift count
output logic sumzero; // sum = 0
// Internal nodes
reg [8:0] i; // loop index
// A real LOP uses a fast carry chain to find only the first 0.
// It is an example of a parallel prefix algorithm. For the sake
// of simplicity, this model is behavioral instead.
// A real LOP would also operate on the sources of the adder, not
// the result!
always_comb
begin
i = 0;
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
normcnt = i; // compute shift count
end
// Also check if sum is zero
assign sumzero = ~(|sum);
endmodule

View File

@ -1,138 +0,0 @@
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] xman; // Fraction of multiplicand x
input logic [51:0] yman; // Fraction of multiplicand y
input logic xdenormE; // is x denormalized
input logic ydenormE; // is y denormalized
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [105:0] rE; // partial product 1
output logic [105:0] sE; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1;
wire [26:0][54:0] pp;
wire [26:0] e;
logic [106:0] tmpsE;
logic [17:0][106:0] lv1add;
logic [11:0][106:0] lv2add;
logic [7:0][106:0] lv3add;
logic [3:0][106:0] lv4add;
logic [21:0][107:0] carryTmp;
wire [26:0][106:0] acc;
// wire [105:0] acc
genvar i;
// assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
// assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
// generate
// for(i=0; i<27; i=i+1) begin
// booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
// end
// endgenerate
// assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
// assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
// assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
// assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
// assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
// assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
// assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
// assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
// assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
// assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
// assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
// assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
// assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
// assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
// assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
// assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
// assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
// assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
// assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
// assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
// assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
// assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
// assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
// assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
// assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
// assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
// assign acc[26] = {pp[26],add1[25], 50'b0};
//***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:86: ASSIGNW
// %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:22: wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add
//*** resize adders
// generate
// for(i=0; i<9; i=i+1) begin
// add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
// .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
// assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
// end
// endgenerate
// generate
// for(i=0; i<6; i=i+1) begin
// add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
// .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
// assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
// end
// endgenerate
// generate
// for(i=0; i<4; i=i+1) begin
// add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
// .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
// assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
// end
// endgenerate
// generate
// for(i=0; i<2; i=i+1) begin
// add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
// .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
// assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
// end
// endgenerate
// add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
// .carry(carryTmp[21]), .sum(tmpsE));
// assign sE = tmpsE[105:0];
// assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0;
// assign sE = acc[0] +
// acc[1] +
// acc[2] +
// acc[3] +
// acc[4] +
// acc[5] +
// acc[6] +
// acc[7] +
// acc[8] +
// acc[9] +
// acc[10] +
// acc[11] +
// acc[12] +
// acc[13] +
// acc[14] +
// acc[15] +
// acc[16] +
// acc[17] +
// acc[18] +
// acc[19] +
// acc[20] +
// acc[21] +
// acc[22] +
// acc[23] +
// acc[24] +
// acc[25] +
// acc[26];
assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
assign rE = 0;
endmodule

View File

@ -1,147 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: normalize.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block performs the normalization shift. It also
// generates the Rands bits for rounding. Finally, it
// handles the special case of a zero sum.
//
// v[53:2] is the fraction component of the prerounded result.
// It can be bypassed back to the X or Z inputs of the FMAC
// for back-to-back operations.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
input logic [62:52] zexp; // sum
input logic [8:0] normcnt; // normalization shift count
input logic [12:0] aeM; // normalization shift count
input logic [12:0] aligncntM; // normalization shift count
input logic [8:0] sumshiftM; // normalization shift count
input logic sumshiftzeroM;
input logic sumzero; // sum is zero
input logic bsM; // sticky bit for addend
input logic xdenormM; // Input Z is denormalized
input logic ydenormM; // Input Z is denormalized
input logic zdenormM; // Input Z is denormalized
input logic xzeroM;
input logic yzeroM;
input logic zzeroM;
output logic sticky; //sticky bit
output logic [12:0] de0;
output logic resultdenorm; // Input Z is denormalized
output logic [53:0] v; // normalized sum, R, S bits
// Internal nodes
logic [163:0] sumshifted; // shifted sum
logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum
logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// When the sum is zero, normalization does not apply and only the
// sticky bit must be computed. Otherwise, the sum is right-shifted
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
// The R bit is also set on denormalized numbers where the exponent
// was computed to be exactly -1023 and the L bit was set. This
// is required for correct rounding up of multiplication results.
// The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
always_comb
begin
// d = aligncntM
// l = normcnt
// p = 53
// ea + eb = aeM
// set d<=2 to d<=0
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
// product anchored or cancellation
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
//normal result
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
//de0 = aeM-normcnt+2-1023;
end else begin
sumshifted = sum << (13'd1080+aeM);
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
resultdenorm = 1;
de0 = 0;
end
end else begin // extract normalized bits
sumshifttmp = {1'b0,sumshiftM} - 2;
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
tmp4 = sumshifted[160];
tmp5 = sumshifted[159];
// for some reason use exp = zexp + {0,1,2}
// the book says exp = zexp + {-1,0,1}
if(sumshiftzeroM) begin
v = sum[162:109];
sticky = (|sum[108:0]) | bsM;
de0 = {2'b0,zexp};
end else if(sumshifted[163] & ~sumshifttmp[9])begin
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
de0 = {2'b0,zexp} +13'd2;
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
v = sumshifted[161:108];
sticky = (|sumshifted[107:0]) | bsM;
de0 = {2'b0,zexp}+13'd1;
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {2'b0,zexp}+{12'b0,zdenormM};
end else if(sumshifted[160]& ~zdenormM) begin
de0 = {2'b0,zexp}-13'b1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bsM;
//de0 = zexp-1;
end else if(sumshifted[159]& ~zdenormM) begin
//v = sumshifted[158:105];
de0 = {2'b0,zexp}-13'd2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bsM;
//de0 = zexp-1;
end else if(zdenormM) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {{2{zexp[62]}},zexp};
end else begin
de0 = 0;
sumshifted = sum << sumshiftM-1; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
end
resultdenorm = (~|de0 | de0[12]);
end
end
// shift sum left by normcnt, filling the right with zeros
//assign sumshifted = sum << normcnt;
endmodule

View File

@ -1,122 +0,0 @@
/////////////////////////////////////////////////////////////////////////////
// Block Name: round.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
//
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
// muxed in to form the actual result for register file writeback. This
// saves a mux from the writeback path.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module round(v, sticky, FrmM, wsign,
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
xman, yman, zman,
wman, infinity, specialsel,expplus1);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] v; // normalized sum, R, S bits
input logic sticky; //sticky bit
input logic [2:0] FrmM;
input logic wsign; // Sign of result
input logic [4:0] FmaFlagsM;
input logic inf; // Some input logic is infinity
input logic nanM; // Some input logic is NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic [51:0] xman; // input logic X
input logic [51:0] yman; // input logic Y
input logic [51:0] zman; // input logic Z
output logic [51:0] wman; // rounded result of FMAC
output logic infinity; // Generate infinity on overflow
output logic specialsel; // Select special result
output logic expplus1;
// Internal nodes
logic plus1; // Round by adding one
wire [52:0] v1; // Result + 1 (for rounding)
wire [51:0] specialres; // Result of exceptional case
wire [51:0] infinityres; // Infinity or largest real number
wire [51:0] nanres; // Propagated or generated NaN
// Compute if round should occur. This equation is derived from
// the rounding tables.
// round to infinity - plus1 if positive
// round to -infinity - plus1 if negitive
// round to zero - do nothing
// round to nearest even
// {v[1], v[0], sticky}
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
//***causes lint warning: %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:59: ALWAYS
// %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:42: wallypipelinedsoc.hart.fpu.fma2.round.plus1
always_comb begin
case (FrmM)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero
3'b010: plus1 = wsign;//round down
3'b011: plus1 = ~wsign;//round up
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
default: plus1 = 1'bx;
endcase
end
// Compute rounded result
assign v1 = v[53:2] + 1;
// Determine if postnormalization is necessary
// Predicted by all bits =1 before round +1
//assign postnormalize = &(v[53:2]) && plus1;
// Determine special result in event of of selection of a result from
// another FPU functional unit, infinity, NAN, or underflow
// The special result mux is a 4:1 mux that should not appear in the
// critical path of the machine. It is not priority encoded, despite
// the code below suggesting otherwise. Also, several of the identical data
// input logics to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation.
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
nanM || inf;
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 52'b0 :
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
// Overflow is handled differently for different rounding modes
// Round is to either infinity or to maximum finite number
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should
// propagate an input logic if the input logic is NaN. Since we assume all
// NaN input logics are already quiet, we don't have to force them quiet.
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
// IEEE 754-2008 section 6.2.3 states:
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input logic NaNs if representable in the destination
// format. This standard does not specify which of the input logic NaNs will provide the payload."
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
// Select result with 4:1 mux
// If the sum is zero and we round up, there is a special case in
// which we produce a massive loss of significance and trap to software.
// It is handled in the exception unit.
assign expplus1 = v1[52] & ~specialsel & plus1;
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
endmodule

View File

@ -1,112 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: sign.v
// Author: David Harris
// Date: 12/1/1995
//
// Block Description:
// This block manages the signs of the numbers.
// 1 = negative
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
////////////////////////////////////////////////////////////////////////////I
input logic xsign; // Sign of X
input logic ysign; // Sign of Y
input logic zsign; // Sign of Z
input logic isAdd;
input logic negsum0; // Sum in +O mode is negative
input logic negsum1; // Sum in +1 mode is negative
input logic bsM; // sticky bit from addend
input logic [2:0] FrmM; // Round toward minus infinity
input logic [4:0] FmaFlagsM; // Round toward minus infinity
input logic sumzero; // Sum = O
input logic zinfM; // Y = Inf
input logic inf; // Some input logic = Inf
output logic wsign; // Sign of W
output logic invz; // Invert addend into adder
output logic negsum; // Negate result of adder
output logic selsum1; // Select +1 mode from compound adder
// Internal nodes
wire zerosign; // sign if result= 0
wire sumneg; // sign if result= 0
wire infsign; // sign if result= Inf
logic tmp;
logic psign;
// Compute sign of product
assign psign = xsign ^ ysign;
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
//do you invert z
assign invz = (zsign ^ psign);
assign selsum1 = invz;
//negate sum if its negitive
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
// is the sum negitive
// if p - z is the sum negitive
// if -p + z is the sum positive
// if -p - z then the sum is negitive
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
//always @(invz or negsum0 or negsum1 or bsM or ps)
// begin
// if (~invz) begin // both input logics have same sign
// negsum = 0;
// selsum1 = 0;
// end else if (bsM) begin // sticky bit set on addend
// selsum1 = 0;
// negsum = negsum0;
// end else if (ps) begin // sticky bit set on product
// selsum1 = 1;
// negsum = negsum1;
// end else begin // both sticky bits clear
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
// selsum1 = ~negsum1; //original
// negsum = negsum1;
// end
//end
// Compute sign of result
// This involves a special case when the sum is zero:
// x+x retains the same sign as x even when x = +/- 0.
// otherwise, x-x = +O unless in the RM mode when x-x = -0
// There is also a special case for NaNs and invalid results;
// the sign of the NaN produced is forced to be 0.
// Sign calculation is not in the critical path so the cases
// can be tolerated.
// IEEE 754-2008 section 6.3 states
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
// also pertaining to negZero it states:
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
// assign zerosign = (~invz && killprodM) ? zsign : 0;
// zero sign
// if product underflows then use psign
// otherwise
// addition
// if cancelation then 0 unless round to -inf
// otherwise psign
// subtraction
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = FmaFlagsM[1] ? psign :
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
(psign^zsign ? psign : FrmM == 3'b010));
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
endmodule

View File

@ -1,67 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: special.v
// Author: David Harris
// Date: 12/2/1995
//
// Block Description:
// This block implements special case handling for unusual operands (e.g.
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE,
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] FInput1E; // Input FInput1E
input logic [63:0] FInput2E; // Input FInput2E
input logic [63:0] FInput3E; // Input FInput3E
output logic xzeroE; // Input FInput1E = 0
output logic yzeroE; // Input FInput2E = 0
output logic zzeroE; // Input FInput3E = 0
output logic xnanE; // FInput1E is NaN
output logic ynanE; // FInput2E is NaN
output logic znanE; // FInput3E is NaN
output logic xdenormE; // FInput1E is denormalized
output logic ydenormE; // FInput2E is denormalized
output logic zdenormE; // FInput3E is denormalized
output logic xinfE; // FInput1E is infinity
output logic yinfE; // FInput2E is infinity
output logic zinfE; // FInput3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnanE = &FInput1E[62:52] && |FInput1E[51:0];
assign ynanE = &FInput2E[62:52] && |FInput2E[51:0];
assign znanE = &FInput3E[62:52] && |FInput3E[51:0];
// Check if input is denormalized
assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0];
// Check if input is infinity
assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
// In the circuit implementation, this can be optimized
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzeroE = ~(|FInput1E[62:0]) || xdenormE;
// assign yzeroE = ~(|FInput2E[62:0]) || ydenormE;
// assign zzeroE = ~(|FInput3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|FInput1E[62:0]);
assign yzeroE = ~(|FInput2E[62:0]);
assign zzeroE = ~(|FInput3E[62:0]);
endmodule

View File

@ -122,6 +122,9 @@ string tests32f[] = '{
};
string tests64d[] = '{
"rv64d/I-FNMADD-D-01", "2000",
"rv64d/I-FNMSUB-D-01", "2000",
"rv64d/I-FMSUB-D-01", "2000",
"rv64d/I-FMAX-D-01", "2000",
"rv64d/I-FMIN-D-01", "2000",
"rv64d/I-FLE-D-01", "2000",
@ -143,12 +146,9 @@ string tests32f[] = '{
"rv64d/I-FSD-01", "2000",
"rv64d/I-FLD-01", "2420",
"rv64d/I-FMADD-D-01", "2000",
// "rv64d/I-FMSUB-D-01", "2000",
// "rv64d/I-FMUL-D-01", "2000",
"rv64d/I-FMV-D-X-01", "2000",
"rv64d/I-FMV-X-D-01", "2000",
// "rv64d/I-FNMADD-D-01", "2000",
// "rv64d/I-FNMSUB-D-01", "2000",
"rv64d/I-FMUL-D-01", "2000",
// "rv64d/I-FMV-D-X-01", "2000",
// "rv64d/I-FMV-X-D-01", "2000",
"rv64d/I-FSGNJ-D-01", "2000",
"rv64d/I-FSGNJN-D-01", "2000",
"rv64d/I-FSGNJX-D-01", "2000",