forked from Github_Repos/cvw
Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main
This commit is contained in:
commit
41a1e6112a
1
wally-pipelined/regression/sim-wally-rv64icfd
Executable file
1
wally-pipelined/regression/sim-wally-rv64icfd
Executable file
@ -0,0 +1 @@
|
||||
vsim -do wally-pipelined-rv64icfd.do
|
50
wally-pipelined/regression/wally-pipelined-rv64icfd.do
Normal file
50
wally-pipelined/regression/wally-pipelined-rv64icfd.do
Normal file
@ -0,0 +1,50 @@
|
||||
# wally-pipelined.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-pipelined.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work] {
|
||||
vdel -all
|
||||
}
|
||||
vlib work
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined.do ../config/rv32ic
|
||||
switch $argc {
|
||||
0 {vlog +incdir+../config/rv64icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt +acc work.testbench -o workopt
|
||||
vsim workopt
|
||||
|
||||
view wave
|
||||
-- display input and output signals as hexidecimal values
|
||||
do ./wave-dos/default-waves.do
|
||||
|
||||
-- Run the Simulation
|
||||
#run 5000
|
||||
run -all
|
||||
#quit
|
||||
noview ../testbench/testbench-imperas.sv
|
||||
view wave
|
@ -1,103 +1,137 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Exponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
|
||||
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
|
||||
xinfE, yinfE, zinfE, nanE, prodinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] ReadData1E; // input 1
|
||||
input logic [63:0] ReadData2E; // input 2
|
||||
input logic [63:0] ReadData3E; // input 3
|
||||
input logic [2:0] FrmE; // Rounding mode
|
||||
output logic [12:0] aligncntE; // status flags
|
||||
output logic [105:0] rE; // one result of partial product sum
|
||||
output logic [105:0] sE; // other result of partial products
|
||||
output logic [163:0] tE; // output logic of alignment shifter
|
||||
output logic [12:0] aeE; // multiplier expoent
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // ReadData3E >> product
|
||||
output logic xzeroE;
|
||||
output logic yzeroE;
|
||||
output logic zzeroE;
|
||||
output logic xdenormE;
|
||||
output logic ydenormE;
|
||||
output logic zdenormE;
|
||||
output logic xinfE;
|
||||
output logic yinfE;
|
||||
output logic zinfE;
|
||||
output logic xnanE;
|
||||
output logic ynanE;
|
||||
output logic znanE;
|
||||
output logic nanE;
|
||||
output logic prodinfE;
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
input logic [63:0] ReadData1E,
|
||||
input logic [63:0] ReadData2E,
|
||||
input logic [63:0] ReadData3E,
|
||||
output logic [105:0] ProdManE,
|
||||
output logic [161:0] AlignedAddendE,
|
||||
output logic [12:0] ProdExpE,
|
||||
output logic AddendStickyE,
|
||||
output logic KillProdE,
|
||||
output logic XZeroE, YZeroE, ZZeroE,
|
||||
output logic XInfE, YInfE, ZInfE,
|
||||
output logic XNaNE, YNaNE, ZNaNE);
|
||||
|
||||
// Internal nodes
|
||||
|
||||
// output logic [12:0] aligncntE; // shift count for alignment
|
||||
logic [51:0] XMan,YMan,ZMan;
|
||||
logic [10:0] XExp,YExp,ZExp;
|
||||
logic XSgn,YSgn,ZSgn;
|
||||
logic [12:0] AlignCnt;
|
||||
logic [211:0] Shift;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
|
||||
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
assign XSgn = ReadData1E[63];
|
||||
assign YSgn = ReadData2E[63];
|
||||
assign ZSgn = ReadData3E[63];
|
||||
|
||||
assign XExp = ReadData1E[62:52];
|
||||
assign YExp = ReadData2E[62:52];
|
||||
assign ZExp = ReadData3E[62:52];
|
||||
|
||||
assign XMan = ReadData1E[51:0];
|
||||
assign YMan = ReadData2E[51:0];
|
||||
assign ZMan = ReadData3E[51:0];
|
||||
|
||||
|
||||
|
||||
// determine if an input is a special value
|
||||
assign XNaNE = &ReadData1E[62:52] && |ReadData1E[51:0];
|
||||
assign YNaNE = &ReadData2E[62:52] && |ReadData2E[51:0];
|
||||
assign ZNaNE = &ReadData3E[62:52] && |ReadData3E[51:0];
|
||||
|
||||
assign XDenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
|
||||
assign YDenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
|
||||
assign ZDenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
|
||||
|
||||
assign XInfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
|
||||
assign YInfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
|
||||
assign ZInfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
|
||||
|
||||
assign XZeroE = ~(|ReadData1E[62:0]);
|
||||
assign YZeroE = ~(|ReadData2E[62:0]);
|
||||
assign ZZeroE = ~(|ReadData3E[62:0]);
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's exponent
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract 3ff to remove one of the biases (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
{2'b0, XExp} + {2'b0, YExp} - 13'h3ff + XDenormE + YDenormE;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = {53'b0,~(XDenormE|XZeroE),XMan} * {53'b0,~(YDenormE|YZeroE),YMan};
|
||||
|
||||
|
||||
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
|
||||
|
||||
// Alignment shifter
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the decimal point (xx.xxxxxxxxxx...)
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Set default values
|
||||
AddendStickyE = 0;
|
||||
KillProdE = 0;
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-56)) begin
|
||||
KillProdE = 1;
|
||||
AlignedAddendE = {55'b0, ~(ZZeroE|ZDenormE),ZMan,2'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(0)) begin
|
||||
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} << -AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(105)) begin
|
||||
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} >> AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
AlignedAddendE = 162'b0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
|
||||
align align(.zman(ReadData3E[51:0]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen1 expgen1(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control output logic
|
||||
|
||||
flag1 flag1(.*);
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,104 +1,107 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Mxponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1M or ReadData3M input logics
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (input logics = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IMMM flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
|
||||
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
|
||||
tM, normcntM, aeM, bsM,killprodM,
|
||||
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
|
||||
zdenormM,xinfM,yinfM,zinfM,xnanM,ynanM,znanM,
|
||||
nanM,sumshiftM,sumshiftzeroM,prodinfM
|
||||
|
||||
);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] ReadData1M; // input logic 1
|
||||
input logic [63:0] ReadData2M; // input logic 2
|
||||
input logic [63:0] ReadData3M; // input logic 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
input logic [105:0] sM; // other result of partial products
|
||||
input logic [163:0] tM; // output of alignment shifter
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // ReadData3M >> product
|
||||
input logic prodinfM;
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
input logic xdenormM;
|
||||
input logic ydenormM;
|
||||
input logic zdenormM;
|
||||
input logic xinfM;
|
||||
input logic yinfM;
|
||||
input logic zinfM;
|
||||
input logic xnanM;
|
||||
input logic ynanM;
|
||||
input logic znanM;
|
||||
input logic nanM;
|
||||
input logic [8:0] sumshiftM;
|
||||
input logic sumshiftzeroM;
|
||||
|
||||
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
input logic [63:0] ReadData1M,
|
||||
input logic [63:0] ReadData2M,
|
||||
input logic [63:0] ReadData3M,
|
||||
input logic [2:0] FrmM,
|
||||
input logic [105:0] ProdManM,
|
||||
input logic [161:0] AlignedAddendM,
|
||||
input logic [12:0] ProdExpM,
|
||||
input logic AddendStickyM,
|
||||
input logic KillProdM,
|
||||
input logic [3:0] FOpCtrlM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
output logic [63:0] FmaResultM,
|
||||
output logic [4:0] FmaFlagsM);
|
||||
|
||||
|
||||
// Internal nodes
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
logic negsum0; // sum +0 < 0
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic sumof; // result out of range
|
||||
logic zexpsel;
|
||||
logic denorm0;
|
||||
logic resultdenorm;
|
||||
logic inf;
|
||||
logic specialsel;
|
||||
logic expplus1;
|
||||
logic sumuf;
|
||||
logic psign;
|
||||
logic sticky;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
|
||||
assign isAdd = 1;
|
||||
logic [51:0] XMan, YMan, ZMan, WMan;
|
||||
logic [10:0] XExp, YExp, ZExp, WExp;
|
||||
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
|
||||
logic IsSub;
|
||||
logic [105:0] ProdMan2;
|
||||
logic [162:0] AlignedAddend2;
|
||||
logic [161:0] Sum;
|
||||
logic [162:0] SumTmp;
|
||||
logic [12:0] SumExp;
|
||||
logic [12:0] SumExpMinus1;
|
||||
logic [12:0] SumExpTmp, WExpTmp;
|
||||
logic [53:0] NormSum;
|
||||
logic [161:0] NormSumTmp;
|
||||
logic [8:0] NormCnt;
|
||||
logic NormSumSticky;
|
||||
logic SumZero;
|
||||
logic NegSum;
|
||||
logic InvZ;
|
||||
logic ResultDenorm;
|
||||
logic Sticky;
|
||||
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
|
||||
logic Invalid,Underflow,Overflow,Inexact;
|
||||
logic [8:0] DenormShift;
|
||||
logic ProdInf, ProdOf, ProdUf;
|
||||
logic [63:0] FmaResultTmp;
|
||||
logic SubBySmallNum;
|
||||
|
||||
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
assign XSgn = ReadData1M[63];
|
||||
assign YSgn = ReadData2M[63];
|
||||
assign ZSgn = ReadData3M[63];
|
||||
|
||||
assign XExp = ReadData1M[62:52];
|
||||
assign YExp = ReadData2M[62:52];
|
||||
assign ZExp = ReadData3M[62:52];
|
||||
|
||||
assign XMan = ReadData1M[51:0];
|
||||
assign YMan = ReadData2M[51:0];
|
||||
assign ZMan = ReadData3M[51:0];
|
||||
|
||||
|
||||
|
||||
// is it an FMSUB or FNMSUB instruction
|
||||
assign IsSub = FOpCtrlM[0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Addition
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = IsSub ? ~(ZSgn ^ PSgn) : (ZSgn ^ PSgn);
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{2'b0,AlignedAddendM} : {2'b0,AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + InvZ;
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = SumTmp[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -110,25 +113,163 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
|
||||
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp+13'd52)>=0);
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
assign DenormShift = ResultDenorm ? SumExpTmp-1 : 6'b0;
|
||||
|
||||
// Normalize the sum
|
||||
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = NormSumTmp[161:108];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = (|NormSumTmp[107:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
assign SumExpTmp = KillProdM ? ZExp : ProdExpM + -({5'b0, NormCnt} - 13'd56);
|
||||
assign SumExp = SumZero ? 12'b0 :
|
||||
ResultDenorm ? 12'b0 :
|
||||
SumExpTmp;
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(ReadData3M[62:52]),.*);
|
||||
round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.wexp(FmaResultM[62:52]),.*);
|
||||
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
|
||||
|
||||
|
||||
// Rounding
|
||||
|
||||
// round to nearest even
|
||||
// {NormSum[1], NormSum[0], Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if NormSum[2] = 1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to -infinity - Plus1 if negitive
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {NormSum[1], NormSum[0], Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// Deterimine if the result was supposed to be subtrated by a small number
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: Plus1Tmp = NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky&NormSum[2]));//round to nearest even
|
||||
3'b001: Plus1Tmp = 0;//round to zero
|
||||
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
|
||||
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
|
||||
3'b100: Plus1Tmp = (NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky)));//round to nearest max magnitude
|
||||
default: Plus1Tmp = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: Minus1Tmp = 0;//round to nearest even
|
||||
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
|
||||
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
|
||||
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
|
||||
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
|
||||
default: Minus1Tmp = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = Sticky | (|NormSum[1:0]) ? Plus1Tmp : 0;
|
||||
assign Minus1 = Sticky | (|NormSum[1:0]) ? Minus1Tmp : 0;
|
||||
// Compute rounded result
|
||||
assign {WExpTmp, WMan} = {SumExp, NormSum[53:2]} + Plus1 - Minus1;
|
||||
assign WExp = WExpTmp[10:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Sign calculation
|
||||
|
||||
// Calculate the product's sign
|
||||
assign PSgn = XSgn ^ YSgn;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
assign zerosign = Underflow ? PSgn :
|
||||
(IsSub ? (PSgn^ZSgn ? PSgn : FrmM == 3'b010) :
|
||||
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn));
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign resultsgn = InvZ&ZSgn&NegSum | InvZ&PSgn&~NegSum | (ZSgn&PSgn);
|
||||
assign WSgn = SumZero ? zerosign : resultsgn;
|
||||
|
||||
sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.wsign(FmaResultM[63]),.*);
|
||||
flag2 flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*);
|
||||
// Select the result
|
||||
assign FmaResultTmp = XNaNM ? {XSgn, XExp, 1'b1,XMan[50:0]} :
|
||||
YNaNM ? {YSgn, YExp, 1'b1,YMan[50:0]} :
|
||||
ZNaNM ? {ZSgn, ZExp, 1'b1,ZMan[50:0]} :
|
||||
Invalid ? {WSgn, 11'h7ff, 1'b1, 51'b0} : // has to be before inf
|
||||
XInfM ? {PSgn, XExp, XMan} :
|
||||
YInfM ? {PSgn, YExp, YMan} :
|
||||
ZInfM ? {ZSgn^IsSub, ZExp, ZMan} :
|
||||
Overflow ? {WSgn, 11'h7ff, 52'b0} :
|
||||
Underflow ? {WSgn, 63'b0} :
|
||||
KillProdM ? ReadData3M - (Minus1&AddendStickyM) + (Plus1&AddendStickyM): // has to be after Underflow
|
||||
{WSgn,WExp,WMan};
|
||||
|
||||
// Negate the result if FNMADD or FNSUB instruction
|
||||
assign FmaResultM[63] = FOpCtrlM[1] ? ~FmaResultTmp[63] : FmaResultTmp[63];
|
||||
assign FmaResultM[62:0] = FmaResultTmp[62:0];
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) any input is a signaling NaN
|
||||
assign ProdOf = (ProdExpM >= 2047 && ~ProdExpM[12]);
|
||||
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
|
||||
assign Invalid = (XNaNM&~XMan[51]) | (YNaNM&~YMan[51]) | (ZNaNM&~ZMan[51]) | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
assign Overflow = WExpTmp >= 2047 & ~WExpTmp[12];
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented and isn't denormalized
|
||||
assign ProdUf = KillProdM & ZZeroM;
|
||||
assign Underflow = (WExpTmp[12] & ~ResultDenorm) | ProdUf;
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
assign Inexact = Sticky|Overflow|Underflow | (|NormSum[1:0]);
|
||||
|
||||
// Combine flags - FMA can't set the Divide by zero flag
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1 +1,170 @@
|
||||
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653
|
||||
cce008007fffffff 7fe6e0fac3dc6e26 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 28027
|
||||
c03fffffffffc800 7fdfffffffffe000 37f07ffffffffffc fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 44043
|
||||
c7f000ffffffffef 7fefffffffffde00 4e1ffffffffffe7f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 107106
|
||||
c7f00000dffffffe 7fe0000000000000 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 238237
|
||||
ffdf0000001fffff 7feffffffffffffe 7fe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 310309
|
||||
c79ff80003fffffe 7feffc0000003ffe 2bd0020000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 426425
|
||||
ffeffffeffc00000 3fffffffffffffff 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 436435
|
||||
d16ff800007fffff 7fe0000000000000 c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 517516
|
||||
d10ffffffff3fffe 7feffffffffffffe b9d07f0000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 519518
|
||||
442ff9fffffffffe ffefffffffffffff 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 553552
|
||||
c34f24b48d2af3e7 7fef7fe000000000 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 577576
|
||||
7fdfffffff8000ff c3f0100000000002 39300dfffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 593592
|
||||
ffe00007fffffdfe 4340000000000001 ffd34131592163f6 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 654653
|
||||
4b98eba3e512fb7b ffe84639040d967a 42c00000010001fe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 683682
|
||||
ffed83a6b2e656b1 7fe0000000000001 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 796795
|
||||
7fd5220b51609cf6 c030000000001020 7fdfbfffffffffdf fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 903902
|
||||
c3d6eb6dede43198 7feffffffffffffe 3a6008000000000f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1078076
|
||||
c1f02000001fffff 7fe0000000000001 e8f000000040000f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1285283
|
||||
c1cdfffbffffffff 7fe0000000000001 bca0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1355353
|
||||
43447336acaf7bd8 ffeffffffffffffe 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1391389
|
||||
4010000000fff7ff ffe0000000000000 7fdfffc000003ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1528526
|
||||
ffe0000002000003 47fffc00000007ff 93b0040000002000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1597595
|
||||
4060000200000400 ffe0000000000000 7fe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1598596
|
||||
fe7007fffdffffff 7fdffffffffff03e 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1631629
|
||||
4000000000000000 ffe0000000000001 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1738736
|
||||
4000000000000000 ffeffffffffffffe 4263dd4adb450db9 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1740738
|
||||
40200001ffc00000 ffe0000000000000 3fdfcfffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1807805
|
||||
400ffffffffffffe ffd00013fffffffe 40200000100001ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1941939
|
||||
400ffffffffffffe ffe0000000000001 c00fffe003ffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 1947945
|
||||
7fe00000080000fe bfffffffffffffff 3fd002000000003f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2006003
|
||||
4010000000000000 ffe0000000000001 7feffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2018015
|
||||
4010000000000000 ffeffffffffffffe bf7ffffffff80001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2020017
|
||||
43ffffd000000000 ffe0000000000000 613ffffffffffe1e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2087084
|
||||
c1fb6efe117a3ae3 7fefffffffffffff 43c0000001effffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2123120
|
||||
ffdfffffc0000000 7fe0000002002000 3fffffffffbfff80 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2147144
|
||||
401ffffffffffffe ffe0000000000001 7c300040000000ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2227224
|
||||
4340000000000000 ffe0000000000001 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2297294
|
||||
c0f0000000203fff 7fefffffffffffff c921fffffffffefe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2402399
|
||||
7fedffffffdfffff c7f0400000000008 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2406403
|
||||
434fffffffffffff ffd0000008fffffe c03fffffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2419416
|
||||
41dfffffffe00003 ffe0000000000001 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2436433
|
||||
c1f0000000037fff 7fdffffffff7ffc0 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2451448
|
||||
ffebfffffffffbff 4010000000000001 bf20001fffffffe0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2465462
|
||||
ffe000020001ffff 7fdfdffff7ffffff 41d000083fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2471468
|
||||
434ffffffffffffe ffe0000000000001 bf1fffffc00003ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2506503
|
||||
7fe0000000000000 c1c0000001ffffbf 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2538535
|
||||
7fe0000000000000 c1d264933e9e988c 3ca0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2565562
|
||||
7fe0000000000000 c00fffffffffffff bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2567564
|
||||
7fe0000000000000 c010000000000001 403400003fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2569566
|
||||
7fe0000000000001 c3d0bfffffffffff a9817e19c25e6ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2590587
|
||||
7fe0000000000001 c1c01feffffffffe 3fe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2592589
|
||||
7fe0000000000001 f860000ffbfffffe 4000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2619616
|
||||
7fe0000000000001 c1e29f751d0db106 41dff88000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2626623
|
||||
7fe0000000000001 c010000000000001 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2639636
|
||||
7fe0000000000001 c340000000000000 41e9bfbd1705ab74 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2641638
|
||||
7fe0000000000001 c1ffffc0007fffff c0e00000003f8000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2644641
|
||||
7fefffffffffffff c3cfff000003ffff c01fffffefbfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2653650
|
||||
c00000ffc0000000 7fefffffffff81ff 00199d0888644678 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2660657
|
||||
7fefffffffffffff c01fffe00000003e 3cdedfffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2671668
|
||||
7fefffffffffffff c7e00800ffffffff c010000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2682679
|
||||
7fefffffffffffff c3f50270323fdbca 3fe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2691688
|
||||
7fefffffffffffff c06f000000000006 8010000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2700697
|
||||
7fefffffffffffff bff0000000000001 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2702699
|
||||
7fefffffffffffff bffffffffffffffe 47edd848c981ea6a fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2704701
|
||||
7fefffffffffffff d6f0007fbfffffff 380ff8000000001f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2707704
|
||||
7fefffffffffffff c167c6ca402625fe ffe0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2709706
|
||||
7fefffffffffffff c340000000000000 7feffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2711708
|
||||
7fefffffffffffff c34fffffffffffff c1a3cdb48240da83 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2713710
|
||||
7feffffffffffffe c01580f1a3e9c31d 3d258f8ba280bed4 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2725722
|
||||
7feffffffffffffe ffd800001fffffff bfd0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2727724
|
||||
7feffffffffffffe c27a98a4d75fad64 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 2736733
|
||||
c01ffffffe03ffff 7fd00000000c0000 c00ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2739736
|
||||
7feffffffffffffe c3f01ffffff00000 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2745742
|
||||
7feffffffffffffe c0550d69ccececd4 403ffffff83fffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2761758
|
||||
7feffffffffffffe c00fffffffffffff b81080ffffffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2776773
|
||||
7feffffffffffffe c0020ec4bd7f8123 403894684b0415af fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2779776
|
||||
7feffffffffffffe c34ffffffffffffe 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2783780
|
||||
7feffffffffffffe ffe0000000000001 43c0000000000bfe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2785782
|
||||
7feffffffffffffe c1f000000003ff7f 40017ffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2788785
|
||||
bf9ffffffd800000 7fefffffffffffff ffefffffffbfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2960957
|
||||
e8d01e2c59865900 7fe05fffffffffff c34ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 2964961
|
||||
ffd917679344f70e 401fffffffffffff c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3094090
|
||||
4470000023ffffff ffe0000000000001 b802000001ffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3204200
|
||||
43627f4abb7a5c8e ffefffffffffffff 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3274270
|
||||
c1c0000820000000 7feffffffff8001f 402000100000007f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3332328
|
||||
c1cd41643238b450 7feffffffffffffe 3f4012189596a55a fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3519515
|
||||
c80ea7921c438451 7fe008000000007e 424153696dc450d3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3552548
|
||||
4f000fffffffffff ffefffffffffffff 4010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3553549
|
||||
7fe1868cfb076bc1 c34000000000037f b7effffc003ffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3719715
|
||||
c3fff9fffffffffe 7fe0000000000000 3d6000008000000e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3726722
|
||||
43f007ffbfffffff ffefffffffffffff 43dffffeffffffbf fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3762758
|
||||
7fdfffdfffffffbe c01fffffffffffff 3fd0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 3895891
|
||||
ffeefffffffffff7 43e0003ffffeffff b7f000001fdfffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4125120
|
||||
4800002000000007 ffe0000000000000 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4319314
|
||||
43f856a5096bfc0d ffeffffffffffffe 3fd0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4391386
|
||||
c009c2b9147e606c 7fe0000002007fff bfa004001ffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4440435
|
||||
4030008000003fff ffe0000000000000 b810eaddea941d3f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4528523
|
||||
67affffff8000006 f3016e70e2a6bd2f c1edddf29e459b21 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4548543
|
||||
ffe07ffbffffffff 5026589203bb88d1 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4586581
|
||||
43dffffc00000003 ffe0000000000000 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4598593
|
||||
ffdfffffff800003 4010000000000001 c290000080000002 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4627622
|
||||
ffd001fffffffbff 4010000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4697692
|
||||
bffffffffffffffe 7fefffffffffffff 3d30040000200000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4704699
|
||||
c000000000000000 7fefffffffffffff bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4774769
|
||||
c000000000000000 7fe9d625d7f2ee96 380ffeffffffc000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4797792
|
||||
41efffffbfffdfff ffe0000000000000 bbf0000003f80000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4807802
|
||||
fcf00000000003e0 7fdfffffffc02000 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4892887
|
||||
c00ffffffffffffe 7fe0000000000000 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4981976
|
||||
c00ffffffffffffe 7fefffffffffffff 4020e8f734a930e7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 4983978
|
||||
ffeffffc01fffffe 43d0000000000000 3806864c983757ae fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5030024
|
||||
41b0000000010007 ffe0000000000001 0010000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5157151
|
||||
c3e413dc0ee29162 7fefffffffffffff 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5193187
|
||||
c01ffffffffffffe 7fe0000000000000 401ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5261255
|
||||
c01ffffffffffffe 7fefffffffffffff c1c177d35a8a07ad fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5263257
|
||||
c340000000000000 7feffffffffffffe 3ffffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5333327
|
||||
c34ff0000003fffe 7fefffffffffffff c0101442690e84e3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5402396
|
||||
c340000000000001 7fe41774eee28bfa 37efffff000000ff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5437431
|
||||
c34fffffffffffff 7fe0000000000000 4010008001fffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5470464
|
||||
c34ffffffffffffe 7fe0000000000000 bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5540534
|
||||
c34ffffffffffffe 7feffffffffffffe c7e6b68e99fe64db fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5542536
|
||||
ffe0000000000000 41effffff7fffffe 2a7000207fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5590584
|
||||
ffe0000000000000 40b00000000008ff 4013ac1788ee2681 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5599593
|
||||
ffe0000000000000 4010000000000000 3fdffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5603597
|
||||
ffe0000000000000 401fffffffffffff 0012000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5605599
|
||||
ffe0000000000000 45e00007fff7ffff 9c80852a49e348a6 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5608602
|
||||
ffe0000000000000 41e6d2bd893fa49f 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5610604
|
||||
ffe0000000000000 7feffffffffffffe 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5612606
|
||||
ffe0000000000000 4804ecddd4dee74f 9700000101fffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5617611
|
||||
ffe0000000000000 47e0400000000100 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5619613
|
||||
ffe0000000000000 41d0000000001fff 800007ffffffdfff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5626620
|
||||
ffe0000000000001 4c7ffffffff87fff 3fbfdffffffffff7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5662656
|
||||
ffe0000000000001 401ffffffffffffe 001ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5675669
|
||||
ffe0000000000001 4340000000000001 48700003fffefffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5677671
|
||||
ffe0000000000001 4000f2f5230ef1a6 382efffffeffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5689683
|
||||
ffe0000000000001 407b2a20706ca02f bcc8eea3de85c218 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5707701
|
||||
41efdffffffbfffe ffe0000000000001 bca0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5715709
|
||||
ffe0000000000001 43e000000000ffff 4340000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5718712
|
||||
ffedffffffff7fff 7f500000001fffff 469cefa7e05db8e7 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5728722
|
||||
ffefffffffffffff 3fffffffffffffff bcaffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5738732
|
||||
ffefffffffffffff 4000000000000001 800ffffffdffe000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5740734
|
||||
ffefffffffffffff 7fe0000000000000 3fdffffffffffe1f fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5749743
|
||||
ffd44208deea7d5b 7fdffffcffffffff caf0000000007fff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5764758
|
||||
ffefffffffffffff 43cffff6ffffffff 47ffba85ed27c05e fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5779773
|
||||
ffeffffffffffffe 40b0000fffffffc0 bfd0000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5799793
|
||||
ffeffffffffffffe 43ea49f9e3cf97b4 0000000000000001 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5808802
|
||||
ffeffffffffffffe 4000000000000001 800ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z zdenorm ovrflw FmaResultM=-inf 5810804
|
||||
ffeffffffffffffe 4010000000000000 bc800001ffffffe0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5812806
|
||||
ffeffffffffffffe 7fe0000000000000 c34ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5819813
|
||||
ffeffffffffffffe 7feffffffffffffe c1efff801fffffff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5821815
|
||||
ffdfffffc0007ffe 4340000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5886880
|
||||
c4a000001ffeffff 7fe0000000000000 b80fc03ffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5888882
|
||||
ffdfffff00000040 48f00001bfffffff c00ffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5910904
|
||||
c37ffffffffffbf0 7fd1800000000000 bfa7e7cad560a3d0 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 5912906
|
||||
c1700000000007f7 7feffffffffffffe 3f6ff7ffffffefff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6240233
|
||||
c3fffffffdfe0000 7fe0000000000000 c34fff6000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6447440
|
||||
400ffffdfffff7fe ffefffffffffffff 41de000000007ffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6483476
|
||||
4030000000004020 ffe88b9c477c3a97 ffe007ffff000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6575568
|
||||
7fe00807ffffffff c1e0000000007fe0 bfeffffffffffffe fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6676669
|
||||
ffdfc00000000800 7fe0000000000000 bcffffffffffefef fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6726719
|
||||
7feffffeffffbfff c34ffffffffffffe c000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6760753
|
||||
42bff00000000010 ffefffffffffffff c3003a94038a1ec3 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6762755
|
||||
c3c00ffffffffeff 7feddda224891f86 43d0aa9335103e61 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6782775
|
||||
c08ff80000000400 7fe0000000000001 3ff0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6796789
|
||||
c07fffdfffffffbe 7feffffffffffffe 474ffffffdffff80 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6798791
|
||||
c01fffffeffff7ff 7fd0080080000000 bff26df7cf61cdd5 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6827820
|
||||
c7effff000000004 7fe0000008000fff 4770000007ffbfff fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 6863856
|
||||
7fe85e6f4033d7dd c000000000000000 bfe0000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7031023
|
||||
c1f732bc454b0563 7fe0000000000001 8000000000000000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7076068
|
||||
ffe000000fffffbe 401ffffffffffffe b80d2116944eef72 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7141133
|
||||
ffd0002000001fff 40e00003ffffefff c03fffffffe80000 fff0000000000000 ffefffffffffffff Wrong FmaResultM= z ovrflw FmaResultM=-inf 7242234
|
||||
|
Binary file not shown.
@ -26,13 +26,13 @@ void main() {
|
||||
char ans[81];
|
||||
char flags[3];
|
||||
int FrmE;
|
||||
long stop = 1119653;
|
||||
int debug = 1;
|
||||
long stop = 5587581;
|
||||
int debug = 0;
|
||||
//my_string = (char *) malloc (nbytes + 1);
|
||||
//bytes_read = getline (&my_string, &nbytes, stdin);
|
||||
|
||||
|
||||
for(n=0; n < 305; n++) {//613 for 10000
|
||||
for(n=0; n < 1000; n++) {//613 for 10000
|
||||
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
|
||||
if(k == stop && debug == 1) break;
|
||||
k++;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,26 +11,25 @@ module tb;
|
||||
wire [4:0] FmaFlagsM;
|
||||
|
||||
wire [12:0] aligncntE; // status flags
|
||||
wire [105:0] rE; // one result of partial product sum
|
||||
wire [105:0] sE; // other result of partial products
|
||||
wire [163:0] tE; // wire of alignment shifter
|
||||
wire [105:0] ProdManE; // other result of partial products
|
||||
wire [161:0] AlignedAddendE; // wire of alignment shifter
|
||||
wire [8:0] normcntE; // shift count for normalizer
|
||||
wire [12:0] aeE; // multiplier expoent
|
||||
wire bsE; // sticky bit of addend
|
||||
wire killprodE; // ReadData3E >> product
|
||||
wire [12:0] ProdExpE; // multiplier expoent
|
||||
wire AddendStickyE; // sticky bit of addend
|
||||
wire KillProdE; // ReadData3E >> product
|
||||
wire prodofE; // ReadData1E*ReadData2E out of range
|
||||
wire xzeroE;
|
||||
wire XZeroE;
|
||||
wire yzeroE;
|
||||
wire zzeroE;
|
||||
wire xdenormE;
|
||||
wire ydenormE;
|
||||
wire zdenormE;
|
||||
wire xinfE;
|
||||
wire yinfE;
|
||||
wire zinfE;
|
||||
wire xnanE;
|
||||
wire ynanE;
|
||||
wire znanE;
|
||||
wire XDenormE;
|
||||
wire YDenormE;
|
||||
wire ZDenormE;
|
||||
wire XInfE;
|
||||
wire YInfE;
|
||||
wire ZInfE;
|
||||
wire XNaNE;
|
||||
wire YNaNE;
|
||||
wire ZNaNE;
|
||||
wire nanE;
|
||||
wire [8:0] sumshiftE;
|
||||
wire sumshiftzeroE;
|
||||
@ -45,16 +44,16 @@ reg ansnan;
|
||||
reg [105:0] s; // partial product 2
|
||||
reg [51:0] xnorm;
|
||||
reg [51:0] ynorm;
|
||||
wire [3:0] FOpCtrlM;
|
||||
|
||||
assign FOpCtrlM = 4'b0;
|
||||
|
||||
|
||||
localparam period = 20;
|
||||
fma1 UUT1(.*);
|
||||
fma2 UUT2(.ReadData1M(ReadData1E), .ReadData2M(ReadData2E), .ReadData3M(ReadData3E), .FrmM(FrmE),
|
||||
.aligncntM(aligncntE), .rM(rE), .sM(sE),
|
||||
.tM(tE), .normcntM(normcntE), .aeM(aeE), .bsM(bsE),.killprodM(killprodE),
|
||||
.xzeroM(xzeroE), .yzeroM(yzeroE),.zzeroM(zzeroE),.xdenormM(xdenormE),.ydenormM(ydenormE),
|
||||
.zdenormM(zdenormE),.xinfM(xinfE),.yinfM(yinfE),.zinfM(zinfE),.xnanM(xnanE),.ynanM(ynanE),.znanM(znanE),
|
||||
.nanM(nanE),.sumshiftM(sumshiftE),.sumshiftzeroM(sumshiftzeroE), .prodinfM(prodinfE), .*);
|
||||
fma2 UUT2(.ReadData1M(ReadData1E), .ReadData2M(ReadData2E), .ReadData3M(ReadData3E), .FrmM(FrmE), .ProdManM(ProdManE),
|
||||
.AlignedAddendM(AlignedAddendE), .ProdExpM(ProdExpE), .AddendStickyM(AddendStickyE),.KillProdM(KillProdE),
|
||||
.XZeroM(XZeroE),.YZeroM(YZeroE),.ZZeroM(ZZeroE),.XInfM(XInfE),.YInfM(YInfE),.ZInfM(ZInfE),.XNaNM(XNaNE),.YNaNM(YNaNE),.ZNaNM(ZNaNE), .*);
|
||||
|
||||
|
||||
initial
|
||||
|
@ -1,65 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Block Name: add.v
|
||||
// Author: David Harris
|
||||
// Date: 11/12/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the addition of the product and addend. It also
|
||||
// contains logic necessary to adjust the signs for effective subtracts
|
||||
// and negative results.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
module add(rM, sM, tM, sum,
|
||||
negsum, invz, selsum1, negsum0, negsum1, killprodM);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [105:0] rM; // partial product 1
|
||||
input logic [105:0] sM; // partial product 2
|
||||
input logic [163:0] tM; // aligned addend
|
||||
input logic invz; // invert addend
|
||||
input logic selsum1; // select +1 mode of compound adder
|
||||
input logic killprodM; // z >> product
|
||||
input logic negsum; // Negate sum
|
||||
output logic [163:0] sum; // sum
|
||||
output logic negsum0; // sum was negative in +0 mode
|
||||
output logic negsum1; // sum was negative in +1 mode
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [105:0] r2; // partial product possibly zeroed out
|
||||
wire [105:0] s2; // partial product possibly zeroed out
|
||||
wire [164:0] t2; // addend after inversion if necessary
|
||||
wire [164:0] sum0; // sum of compound adder +0 mode
|
||||
wire [164:0] sum1; // sum of compound adder +1 mode
|
||||
wire [163:0] prodshifted; // sum of compound adder +1 mode
|
||||
wire [164:0] tmp; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if z'sM sign is diffrent from the product'sM sign
|
||||
|
||||
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
|
||||
|
||||
// Zero out product if Z >> product or product really should be
|
||||
|
||||
assign r2 = killprodM ? 106'b0 : rM;
|
||||
assign s2 = killprodM ? 106'b0 : sM;
|
||||
|
||||
//***replace this with a more structural cpa that synthisises better
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
|
||||
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
|
||||
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[164];
|
||||
assign negsum1 = sum1[164];
|
||||
|
||||
// Mux proper result (+Oil mode and inversion) using 4:1 mux
|
||||
//assign sumzero = |sum;
|
||||
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,88 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: align.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the alignment shifter. It is responsible for
|
||||
// adjusting the fraction portion of the addend relative to the fraction
|
||||
// produced in the multiplier array.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
killprodE, sumshiftE, sumshiftzeroE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] zman; // Fraction of addend z;
|
||||
input logic [12:0] aligncntE; // amount to shift
|
||||
input logic xzeroE; // Input X = 0
|
||||
input logic yzeroE; // Input Y = 0
|
||||
input logic zzeroE; // Input Z = 0
|
||||
input logic zdenormE; // Input Z is denormalized
|
||||
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // Z >> product
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [215:0] shift; // aligned addend from shifter
|
||||
logic [12:0] tmp;
|
||||
|
||||
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
bsE = 0;
|
||||
|
||||
// And to using product as primary operand in adder I exponent gen
|
||||
killprodE = xzeroE | yzeroE;
|
||||
// d = aligncntE
|
||||
// p = 53
|
||||
//***try reducing this hardware to use one shifter
|
||||
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshiftE = 163; // 3p+4
|
||||
sumshiftzeroE = 0;
|
||||
shift = {1'b1,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0]; // p + 2 - d
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0];
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else begin // d >= p+3
|
||||
// addend anchored case with saturated shift
|
||||
sumshiftE = 0;
|
||||
sumshiftzeroE = 1;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
killprodE = 1;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -1,53 +0,0 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] xExt; // multiplicand xExt
|
||||
input logic [2:0] choose; // bits needed to choose which encoding
|
||||
output logic [1:0] add1; // do you add 1
|
||||
output logic e;
|
||||
output logic [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] temp;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
3'b010 : pp = {1'b0, xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = '1; // -0
|
||||
endcase
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
3'b010 : e = 0; // 1
|
||||
3'b011 : e = 0; // 2
|
||||
3'b100 : e = 1; // -2
|
||||
3'b101 : e = 1; // -1
|
||||
3'b110 : e = 1; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
@ -1,93 +0,0 @@
|
||||
// //***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:37: ASSIGNW
|
||||
// //%Warning-UNOPTFLAT: Example path: src/fpu/compressors.sv:32: wallypipelinedsoc.hart.fpu.fma1.multiply.genblk5[0].add4.cout
|
||||
|
||||
// module add3comp2(a, b, c, carry, sum);
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// //look into diffrent implementations of the compressors?
|
||||
|
||||
// parameter BITS = 4;
|
||||
// input logic [BITS-1:0] a;
|
||||
// input logic [BITS-1:0] b;
|
||||
// input logic [BITS-1:0] c;
|
||||
// output logic [BITS-1:0] carry;
|
||||
// output logic [BITS-1:0] sum;
|
||||
// genvar i;
|
||||
|
||||
// generate
|
||||
// for(i= 0; i<BITS; i=i+1) begin
|
||||
// sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// endmodule
|
||||
|
||||
// module add4comp2(a, b, c, d, carry, sum);
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// parameter BITS = 4;
|
||||
// input logic [BITS-1:0] a;
|
||||
// input logic [BITS-1:0] b;
|
||||
// input logic [BITS-1:0] c;
|
||||
// input logic [BITS-1:0] d;
|
||||
// output logic [BITS:0] carry;
|
||||
// output logic [BITS-1:0] sum;
|
||||
|
||||
// logic [BITS-1:0] cout;
|
||||
// logic carryTmp;
|
||||
// genvar i;
|
||||
|
||||
|
||||
// sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
// generate
|
||||
// for(i= 1; i<BITS-1; i=i+1) begin
|
||||
// sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
|
||||
// sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
// assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
// assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
// endmodule
|
||||
|
||||
// module sng3comp2(a, b, c, carry, sum);
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// //look into diffrent implementations of the compressors?
|
||||
|
||||
// input logic a;
|
||||
// input logic b;
|
||||
// input logic c;
|
||||
// output logic carry;
|
||||
// output logic sum;
|
||||
|
||||
// logic axorb;
|
||||
|
||||
// assign axorb = a ^ b;
|
||||
// assign sum = axorb ^ c;
|
||||
|
||||
// assign carry = axorb ? c : a;
|
||||
|
||||
// endmodule
|
||||
|
||||
// module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// //look into pass gate 4:2 counters?
|
||||
|
||||
// input logic a;
|
||||
// input logic b;
|
||||
// input logic c;
|
||||
// input logic d;
|
||||
// input logic cin;
|
||||
// output logic cout;
|
||||
// output logic carry;
|
||||
// output logic sum;
|
||||
|
||||
// logic TmpSum;
|
||||
|
||||
// sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
// sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
// endmodule
|
@ -1,90 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
xdenormE, ydenormE, zdenormE,
|
||||
aligncntE, prodof, aeE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic xdenormE; // Z is denorm
|
||||
input logic ydenormE; // Z is denorm
|
||||
input logic zdenormE; // Z is denorm
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [12:0] aligncntE; // shift count for alignment shifter
|
||||
output logic prodof; // X*Y exponent out of bounds
|
||||
output logic [12:0] aeE; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
|
||||
|
||||
assign prodof = (aeE > 2046 && ~aeE[12]);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
//assign aligncntE = zexp - aeE;// KEP use all of aeE
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : aeE;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
endmodule
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen2(xexp, yexp, zexp,
|
||||
sumzero, resultdenorm, infinity,
|
||||
FmaFlagsM, inf, expplus1,
|
||||
nanM, de0, xnanM, ynanM, znanM, specialsel,
|
||||
wexp,
|
||||
sumof, sumuf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic sumzero; // sum exactly equals zero
|
||||
input logic resultdenorm; // postnormalize rounded result
|
||||
input logic infinity; // generate infinity on overflow
|
||||
input logic [4:0] FmaFlagsM; // Result invalid
|
||||
input logic inf; // Some input is infinity
|
||||
input logic nanM; // Some input is NaN
|
||||
input logic [12:0] de0; // X is NaN NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic expplus1;
|
||||
input logic specialsel; // Select special result
|
||||
output logic [62:52] wexp; // Exponent of result
|
||||
output logic sumof; // X*Y+Z exponent out of bounds
|
||||
output logic sumuf; // X*Y+Z exponent underflows
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = resultdenorm | sumzero ? 0 : de0;
|
||||
assign sumof = ~de[12] && de > 2046;
|
||||
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
|
||||
endmodule
|
||||
|
||||
|
@ -168,8 +168,8 @@ module fctrl (
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmadd = ?010
|
||||
// fnmsub = ?011
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, is negitive, is sub}
|
||||
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
|
||||
|
@ -1,34 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanE; // X is NaN
|
||||
input logic ynanE; // Y is NaN
|
||||
input logic znanE; // Z is NaN
|
||||
input logic prodof; // X*Y overflows exponent
|
||||
output logic nanE; // Some source is NaN
|
||||
|
||||
// Internal nodes
|
||||
|
||||
output logic prodinfE; // X*Y larger than max possible
|
||||
|
||||
// If any input logic is NaN, propagate the NaN
|
||||
|
||||
assign nanE = xnanE || ynanE || znanE;
|
||||
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinfE = prodof && ~xnanE && ~ynanE;
|
||||
|
||||
|
||||
endmodule
|
@ -1,80 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
|
||||
xzeroM, yzeroM, zzeroM, vbits, killprodM,
|
||||
inf, nanM, FmaFlagsM,sticky,prodinfM);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xsign; // Sign of z
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic xzeroM; // x = 0
|
||||
input logic yzeroM; // y = 0
|
||||
input logic zzeroM; // y = 0
|
||||
input logic killprodM;
|
||||
input logic [1:0] vbits; // R and S bits of result
|
||||
output logic inf; // Some source is Inf
|
||||
input logic nanM; // Some source is NaN
|
||||
output logic [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic suminf;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
|
||||
//assign inf= xinfM || yinfM || zinfM;//original
|
||||
|
||||
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input logic is denormalized
|
||||
// 2) output logic would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
|
||||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
endmodule
|
@ -1,103 +1,141 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Exponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to FInput1E or FInput3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the FInput1E or FInput3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma1(FInput1E, FInput2E, FInput3E, FrmE,
|
||||
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
|
||||
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
|
||||
xinfE, yinfE, zinfE, nanE, prodinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] FInput1E; // input 1
|
||||
input logic [63:0] FInput2E; // input 2
|
||||
input logic [63:0] FInput3E; // input 3
|
||||
input logic [2:0] FrmE; // Rounding mode
|
||||
output logic [12:0] aligncntE; // status flags
|
||||
output logic [105:0] rE; // one result of partial product sum
|
||||
output logic [105:0] sE; // other result of partial products
|
||||
output logic [163:0] tE; // output logic of alignment shifter
|
||||
output logic [12:0] aeE; // multiplier expoent
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // FInput3E >> product
|
||||
output logic xzeroE;
|
||||
output logic yzeroE;
|
||||
output logic zzeroE;
|
||||
output logic xdenormE;
|
||||
output logic ydenormE;
|
||||
output logic zdenormE;
|
||||
output logic xinfE;
|
||||
output logic yinfE;
|
||||
output logic zinfE;
|
||||
output logic xnanE;
|
||||
output logic ynanE;
|
||||
output logic znanE;
|
||||
output logic nanE;
|
||||
output logic prodinfE;
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
input logic [63:0] FInput1E,
|
||||
input logic [63:0] FInput2E,
|
||||
input logic [63:0] FInput3E,
|
||||
input logic [3:0] FOpCtrlE,
|
||||
output logic [105:0] ProdManE,
|
||||
output logic [161:0] AlignedAddendE,
|
||||
output logic [12:0] ProdExpE,
|
||||
output logic AddendStickyE,
|
||||
output logic KillProdE,
|
||||
output logic XZeroE, YZeroE, ZZeroE,
|
||||
output logic XInfE, YInfE, ZInfE,
|
||||
output logic XNaNE, YNaNE, ZNaNE);
|
||||
|
||||
// Internal nodes
|
||||
|
||||
// output logic [12:0] aligncntE; // shift count for alignment
|
||||
logic [51:0] XMan,YMan,ZMan;
|
||||
logic [10:0] XExp,YExp,ZExp;
|
||||
logic XSgn,YSgn,ZSgn;
|
||||
logic [12:0] AlignCnt;
|
||||
logic [211:0] Shift;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
logic [63:0] FInput3E2;
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
|
||||
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
assign XSgn = FInput1E[63];
|
||||
assign YSgn = FInput2E[63];
|
||||
assign ZSgn = FInput3E2[63];
|
||||
|
||||
assign XExp = FInput1E[62:52];
|
||||
assign YExp = FInput2E[62:52];
|
||||
assign ZExp = FInput3E2[62:52];
|
||||
|
||||
assign XMan = FInput1E[51:0];
|
||||
assign YMan = FInput2E[51:0];
|
||||
assign ZMan = FInput3E2[51:0];
|
||||
|
||||
|
||||
logic prodof; // FInput1E*FInput2E out of range
|
||||
|
||||
// determine if an input is a special value
|
||||
assign XNaNE = &FInput1E[62:52] && |FInput1E[51:0];
|
||||
assign YNaNE = &FInput2E[62:52] && |FInput2E[51:0];
|
||||
assign ZNaNE = &FInput3E2[62:52] && |FInput3E2[51:0];
|
||||
|
||||
assign XDenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
|
||||
assign YDenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
|
||||
assign ZDenormE = ~(|FInput3E2[62:52]) && |FInput3E2[51:0];
|
||||
|
||||
assign XInfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
|
||||
assign YInfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
|
||||
assign ZInfE = &FInput3E2[62:52] && ~(|FInput3E2[51:0]);
|
||||
|
||||
assign XZeroE = ~(|FInput1E[62:0]);
|
||||
assign YZeroE = ~(|FInput2E[62:0]);
|
||||
assign ZZeroE = ~(|FInput3E2[62:0]);
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's exponent
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract 3ff to remove one of the biases (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
{2'b0, XExp} + {2'b0, YExp} - 13'h3ff + {12'b0, XDenormE} + {12'b0, YDenormE};
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = {53'b0,~(XDenormE|XZeroE),XMan} * {53'b0,~(YDenormE|YZeroE),YMan};
|
||||
|
||||
|
||||
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - {2'b0, ZExp} - {12'b0, ZDenormE};
|
||||
|
||||
// Alignment shifter
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the decimal point (xx.xxxxxxxxxx...)
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Set default values
|
||||
AddendStickyE = 0;
|
||||
KillProdE = 0;
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
AlignedAddendE = {107'b0, ~(ZZeroE|ZDenormE),ZMan,2'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} << -AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
|
||||
Shift = {55'b0, ~(ZZeroE|ZDenormE),ZMan, 104'b0} >> AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
AlignedAddendE = 162'b0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*);
|
||||
align align(.zman(FInput3E[51:0]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen1 expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control output logic
|
||||
|
||||
flag1 flag1(.*);
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,107 +1,110 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Mxponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to FInput1M or FInput3M input logics
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (input logics = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the FInput1M or FInput3M input logics for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IMMM flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma2(FInput1M, FInput2M, FInput3M, FrmM,
|
||||
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
|
||||
tM, normcntM, aeM, bsM,killprodM,
|
||||
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
|
||||
zdenormM,xinfM,yinfM,zinfM,xnanM,ynanM,znanM,
|
||||
nanM,sumshiftM,sumshiftzeroM,prodinfM
|
||||
|
||||
);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] FInput1M; // input logic 1
|
||||
input logic [63:0] FInput2M; // input logic 2
|
||||
input logic [63:0] FInput3M; // input logic 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
input logic [105:0] sM; // other result of partial products
|
||||
input logic [163:0] tM; // output of alignment shifter
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // FInput3M >> product
|
||||
input logic prodinfM;
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
input logic xdenormM;
|
||||
input logic ydenormM;
|
||||
input logic zdenormM;
|
||||
input logic xinfM;
|
||||
input logic yinfM;
|
||||
input logic zinfM;
|
||||
input logic xnanM;
|
||||
input logic ynanM;
|
||||
input logic znanM;
|
||||
input logic nanM;
|
||||
input logic [8:0] sumshiftM;
|
||||
input logic sumshiftzeroM;
|
||||
|
||||
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=FInput1M*FInput2M+FInput3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
input logic [63:0] FInput1M,
|
||||
input logic [63:0] FInput2M,
|
||||
input logic [63:0] FInput3M,
|
||||
input logic [2:0] FrmM,
|
||||
input logic [105:0] ProdManM,
|
||||
input logic [161:0] AlignedAddendM,
|
||||
input logic [12:0] ProdExpM,
|
||||
input logic AddendStickyM,
|
||||
input logic KillProdM,
|
||||
input logic [3:0] FOpCtrlM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
output logic [63:0] FmaResultM,
|
||||
output logic [4:0] FmaFlagsM);
|
||||
|
||||
|
||||
// Internal nodes
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
logic negsum0; // sum +0 < 0
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic sumof; // result out of range
|
||||
logic zexpsel;
|
||||
logic denorm0;
|
||||
logic resultdenorm;
|
||||
logic inf;
|
||||
logic specialsel;
|
||||
logic expplus1;
|
||||
logic sumuf;
|
||||
logic psign;
|
||||
logic sticky;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
logic wsign;
|
||||
logic [51:0] wman;
|
||||
logic [10:0] wexp;
|
||||
|
||||
assign isAdd = 1;
|
||||
logic [51:0] XMan, YMan, ZMan, WMan;
|
||||
logic [10:0] XExp, YExp, ZExp, WExp;
|
||||
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
|
||||
logic [105:0] ProdMan2;
|
||||
logic [162:0] AlignedAddend2;
|
||||
logic [161:0] Sum;
|
||||
logic [162:0] SumTmp;
|
||||
logic [12:0] SumExp;
|
||||
logic [12:0] SumExpMinus1;
|
||||
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
|
||||
logic [53:0] NormSum;
|
||||
logic [161:0] NormSumTmp;
|
||||
logic [8:0] NormCnt;
|
||||
logic NormSumSticky;
|
||||
logic SumZero;
|
||||
logic NegSum;
|
||||
logic InvZ;
|
||||
logic ResultDenorm;
|
||||
logic Sticky;
|
||||
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
|
||||
logic Invalid,Underflow,Overflow,Inexact;
|
||||
logic [8:0] DenormShift;
|
||||
logic ProdInf, ProdOf, ProdUf;
|
||||
logic [63:0] FmaResultTmp;
|
||||
logic SubBySmallNum;
|
||||
logic [63:0] FInput3M2;
|
||||
logic ZeroSgn, ResultSgn;
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
|
||||
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
assign XSgn = FInput1M[63];
|
||||
assign YSgn = FInput2M[63];
|
||||
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign XExp = FInput1M[62:52];
|
||||
assign YExp = FInput2M[62:52];
|
||||
assign ZExp = FInput3M2[62:52];
|
||||
|
||||
assign XMan = FInput1M[51:0];
|
||||
assign YMan = FInput2M[51:0];
|
||||
assign ZMan = FInput3M2[51:0];
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
|
||||
// Addition
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = SumTmp[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -113,27 +116,160 @@ module fma2(FInput1M, FInput2M, FInput3M, FrmM,
|
||||
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp+13'd52)>=0);
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
assign SumExpTmpMinus1 = SumExpTmp-1;
|
||||
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
|
||||
|
||||
// Normalize the sum
|
||||
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = NormSumTmp[161:108];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = (|NormSumTmp[107:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign SumExp = SumZero ? 13'b0 :
|
||||
ResultDenorm ? 13'b0 :
|
||||
SumExpTmp;
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(FInput3M[62:52]),.*);
|
||||
round round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen2 expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*);
|
||||
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
|
||||
|
||||
|
||||
// Rounding
|
||||
|
||||
// round to nearest even
|
||||
// {NormSum[1], NormSum[0], Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if NormSum[2] = 1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to -infinity - Plus1 if negitive
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {NormSum[1], NormSum[0], Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// Deterimine if the result was supposed to be subtrated by a small number
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: Plus1Tmp = NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky&NormSum[2]));//round to nearest even
|
||||
3'b001: Plus1Tmp = 0;//round to zero
|
||||
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
|
||||
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
|
||||
3'b100: Plus1Tmp = (NormSum[1] & (NormSum[0] | (Sticky&~(~NormSum[0]&SubBySmallNum)) | (~NormSum[0]&~Sticky)));//round to nearest max magnitude
|
||||
default: Plus1Tmp = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: Minus1Tmp = 0;//round to nearest even
|
||||
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
|
||||
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
|
||||
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
|
||||
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
|
||||
default: Minus1Tmp = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = Sticky | (|NormSum[1:0]) ? Plus1Tmp : 1'b0;
|
||||
assign Minus1 = Sticky | (|NormSum[1:0]) ? Minus1Tmp : 1'b0;
|
||||
// Compute rounded result
|
||||
assign {WExpTmp, WMan} = {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1};
|
||||
assign WExp = WExpTmp[10:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Sign calculation
|
||||
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
|
||||
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
|
||||
|
||||
sign sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*);
|
||||
flag2 flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*);
|
||||
// Select the result
|
||||
assign FmaResultM = XNaNM ? {XSgn, XExp, 1'b1,XMan[50:0]} :
|
||||
YNaNM ? {YSgn, YExp, 1'b1,YMan[50:0]} :
|
||||
ZNaNM ? {ZSgn, ZExp, 1'b1,ZMan[50:0]} :
|
||||
Invalid ? {WSgn, 11'h7ff, 1'b1, 51'b0} : // has to be before inf
|
||||
XInfM ? {PSgn, XExp, XMan} :
|
||||
YInfM ? {PSgn, YExp, YMan} :
|
||||
ZInfM ? {ZSgn, ZExp, ZMan} :
|
||||
Overflow ? {WSgn, 11'h7ff, 52'b0} :
|
||||
Underflow & ~ResultDenorm ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} :
|
||||
KillProdM ? {ZSgn, ZExp, ZMan} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)}: // has to be after Underflow
|
||||
{WSgn,WExp,WMan};
|
||||
|
||||
|
||||
assign FmaResultM = {wsign,wexp,wman};
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) any input is a signaling NaN
|
||||
assign ProdOf = (ProdExpM >= 2047 && ~ProdExpM[12]);
|
||||
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
|
||||
assign Invalid = (XNaNM&~XMan[51]) | (YNaNM&~YMan[51]) | (ZNaNM&~ZMan[51]) | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
assign Overflow = WExpTmp >= 2047 & ~WExpTmp[12];
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
assign ProdUf = KillProdM & ZZeroM;
|
||||
assign Underflow = SumExp[12] | ProdUf;
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
assign Inexact = Sticky|Overflow| (|NormSum[1:0]);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -80,34 +80,17 @@ module fpu (
|
||||
logic [4:0] FDivFlagsM, FDivFlagsW;
|
||||
|
||||
// FMA signals
|
||||
logic [12:0] aligncntE, aligncntM;
|
||||
logic [105:0] rE, rM;
|
||||
logic [105:0] sE, sM;
|
||||
logic [163:0] tE, tM;
|
||||
logic [8:0] normcntE, normcntM;
|
||||
logic [12:0] aeE, aeM;
|
||||
logic bsE, bsM;
|
||||
logic killprodE, killprodM;
|
||||
logic prodofE, prodofM;
|
||||
logic xzeroE, xzeroM;
|
||||
logic yzeroE, yzeroM;
|
||||
logic zzeroE, zzeroM;
|
||||
logic xdenormE, xdenormM;
|
||||
logic ydenormE, ydenormM;
|
||||
logic zdenormE, zdenormM;
|
||||
logic xinfE, xinfM;
|
||||
logic yinfE, yinfM;
|
||||
logic zinfE, zinfM;
|
||||
logic xnanE, xnanM;
|
||||
logic ynanE, ynanM;
|
||||
logic znanE, znanM;
|
||||
logic nanE, nanM;
|
||||
logic [8:0] sumshiftE, sumshiftM;
|
||||
logic sumshiftzeroE, sumshiftzeroM;
|
||||
logic prodinfE, prodinfM;
|
||||
logic [63:0] FmaResultM, FmaResultW;
|
||||
logic [4:0] FmaFlagsM, FmaFlagsW;
|
||||
|
||||
logic [105:0] ProdManE, ProdManM;
|
||||
logic [161:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
|
||||
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
|
||||
logic [63:0] FmaResultM, FmaResultW;
|
||||
logic [4:0] FmaFlagsM, FmaFlagsW;
|
||||
|
||||
// add/cvt signals
|
||||
logic [63:0] AddSumE, AddSumTcE;
|
||||
logic [3:0] AddSelInvE;
|
||||
@ -241,7 +224,7 @@ module fpu (
|
||||
.CLK(clk),
|
||||
.ECLK(fpdivClk));
|
||||
|
||||
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk));
|
||||
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .*);
|
||||
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
fpuaddcvt1 fpadd1 (.*);
|
||||
@ -265,31 +248,20 @@ module fpu (
|
||||
//*****************
|
||||
// fma E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM);
|
||||
flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM);
|
||||
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM);
|
||||
flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM);
|
||||
flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM);
|
||||
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);
|
||||
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM);
|
||||
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
|
||||
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
|
||||
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
|
||||
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
|
||||
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
|
||||
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
|
||||
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
|
||||
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
|
||||
flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM);
|
||||
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM);
|
||||
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM);
|
||||
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM);
|
||||
|
||||
//*****************
|
||||
// fpadd E/M pipe registers
|
||||
|
@ -1,40 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: lop.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements a Leading One Predictor used to determine
|
||||
// the normalization shift count.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module lza(sum, normcnt, sumzero);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [163:0] sum; // sum
|
||||
output logic [8:0] normcnt; // normalization shift count
|
||||
output logic sumzero; // sum = 0
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
// of simplicity, this model is behavioral instead.
|
||||
// A real LOP would also operate on the sources of the adder, not
|
||||
// the result!
|
||||
|
||||
always_comb
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
// Also check if sum is zero
|
||||
assign sumzero = ~(|sum);
|
||||
|
||||
endmodule
|
||||
|
@ -1,138 +0,0 @@
|
||||
|
||||
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] xman; // Fraction of multiplicand x
|
||||
input logic [51:0] yman; // Fraction of multiplicand y
|
||||
input logic xdenormE; // is x denormalized
|
||||
input logic ydenormE; // is y denormalized
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [105:0] rE; // partial product 1
|
||||
output logic [105:0] sE; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [106:0] tmpsE;
|
||||
logic [17:0][106:0] lv1add;
|
||||
logic [11:0][106:0] lv2add;
|
||||
logic [7:0][106:0] lv3add;
|
||||
logic [3:0][106:0] lv4add;
|
||||
logic [21:0][107:0] carryTmp;
|
||||
wire [26:0][106:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
// assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
|
||||
// assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
|
||||
// generate
|
||||
// for(i=0; i<27; i=i+1) begin
|
||||
// booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
// assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
|
||||
// assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
// assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
// assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
// assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
// assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
// assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
// assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
// assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
// assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
// assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
// assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
// assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
// assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
// assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
// assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
// assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
// assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
// assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
// assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
// assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
// assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
// assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
// assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
// assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
|
||||
// assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//***breaks lint with warnings like: %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:86: ASSIGNW
|
||||
// %Warning-UNOPTFLAT: Example path: src/fpu/multiply.sv:22: wallypipelinedsoc.hart.fpu.fma1.multiply.lv3add
|
||||
//*** resize adders
|
||||
// generate
|
||||
// for(i=0; i<9; i=i+1) begin
|
||||
// add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
// .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
|
||||
// assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// generate
|
||||
// for(i=0; i<6; i=i+1) begin
|
||||
// add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
// .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
|
||||
// assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// generate
|
||||
// for(i=0; i<4; i=i+1) begin
|
||||
// add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
// .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
|
||||
// assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
|
||||
// generate
|
||||
// for(i=0; i<2; i=i+1) begin
|
||||
// add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
// .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
// assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
// .carry(carryTmp[21]), .sum(tmpsE));
|
||||
// assign sE = tmpsE[105:0];
|
||||
// assign rE = {carryTmp[21][104:0], 1'b0};
|
||||
// assign rE = 0;
|
||||
// assign sE = acc[0] +
|
||||
// acc[1] +
|
||||
// acc[2] +
|
||||
// acc[3] +
|
||||
// acc[4] +
|
||||
// acc[5] +
|
||||
// acc[6] +
|
||||
// acc[7] +
|
||||
// acc[8] +
|
||||
// acc[9] +
|
||||
// acc[10] +
|
||||
// acc[11] +
|
||||
// acc[12] +
|
||||
// acc[13] +
|
||||
// acc[14] +
|
||||
// acc[15] +
|
||||
// acc[16] +
|
||||
// acc[17] +
|
||||
// acc[18] +
|
||||
// acc[19] +
|
||||
// acc[20] +
|
||||
// acc[21] +
|
||||
// acc[22] +
|
||||
// acc[23] +
|
||||
// acc[24] +
|
||||
// acc[25] +
|
||||
// acc[26];
|
||||
|
||||
assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
|
||||
assign rE = 0;
|
||||
endmodule
|
||||
|
@ -1,147 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: normalize.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the normalization shift. It also
|
||||
// generates the Rands bits for rounding. Finally, it
|
||||
// handles the special case of a zero sum.
|
||||
//
|
||||
// v[53:2] is the fraction component of the prerounded result.
|
||||
// It can be bypassed back to the X or Z inputs of the FMAC
|
||||
// for back-to-back operations.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
|
||||
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input logic [163:0] sum; // sum
|
||||
input logic [62:52] zexp; // sum
|
||||
input logic [8:0] normcnt; // normalization shift count
|
||||
input logic [12:0] aeM; // normalization shift count
|
||||
input logic [12:0] aligncntM; // normalization shift count
|
||||
input logic [8:0] sumshiftM; // normalization shift count
|
||||
input logic sumshiftzeroM;
|
||||
input logic sumzero; // sum is zero
|
||||
input logic bsM; // sticky bit for addend
|
||||
input logic xdenormM; // Input Z is denormalized
|
||||
input logic ydenormM; // Input Z is denormalized
|
||||
input logic zdenormM; // Input Z is denormalized
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
output logic sticky; //sticky bit
|
||||
output logic [12:0] de0;
|
||||
output logic resultdenorm; // Input Z is denormalized
|
||||
output logic [53:0] v; // normalized sum, R, S bits
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic [163:0] sumshifted; // shifted sum
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
// sticky bit must be computed. Otherwise, the sum is right-shifted
|
||||
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
|
||||
|
||||
// The R bit is also set on denormalized numbers where the exponent
|
||||
// was computed to be exactly -1023 and the L bit was set. This
|
||||
// is required for correct rounding up of multiplication results.
|
||||
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
|
||||
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always_comb
|
||||
begin
|
||||
// d = aligncntM
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = aeM
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
//de0 = aeM-normcnt+2-1023;
|
||||
end else begin
|
||||
sumshifted = sum << (13'd1080+aeM);
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
resultdenorm = 1;
|
||||
de0 = 0;
|
||||
end
|
||||
|
||||
end else begin // extract normalized bits
|
||||
sumshifttmp = {1'b0,sumshiftM} - 2;
|
||||
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
|
||||
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
|
||||
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
|
||||
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
|
||||
tmp4 = sumshifted[160];
|
||||
tmp5 = sumshifted[159];
|
||||
// for some reason use exp = zexp + {0,1,2}
|
||||
// the book says exp = zexp + {-1,0,1}
|
||||
if(sumshiftzeroM) begin
|
||||
v = sum[162:109];
|
||||
sticky = (|sum[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp};
|
||||
end else if(sumshifted[163] & ~sumshifttmp[9])begin
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp} +13'd2;
|
||||
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
|
||||
v = sumshifted[161:108];
|
||||
sticky = (|sumshifted[107:0]) | bsM;
|
||||
de0 = {2'b0,zexp}+13'd1;
|
||||
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {2'b0,zexp}+{12'b0,zdenormM};
|
||||
end else if(sumshifted[160]& ~zdenormM) begin
|
||||
de0 = {2'b0,zexp}-13'b1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(sumshifted[159]& ~zdenormM) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = {2'b0,zexp}-13'd2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(zdenormM) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {{2{zexp[62]}},zexp};
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshiftM-1; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
end
|
||||
|
||||
resultdenorm = (~|de0 | de0[12]);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// shift sum left by normcnt, filling the right with zeros
|
||||
//assign sumshifted = sum << normcnt;
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,122 +0,0 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: round.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
// saves a mux from the writeback path.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v, sticky, FrmM, wsign,
|
||||
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
|
||||
xman, yman, zman,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] v; // normalized sum, R, S bits
|
||||
input logic sticky; //sticky bit
|
||||
input logic [2:0] FrmM;
|
||||
input logic wsign; // Sign of result
|
||||
input logic [4:0] FmaFlagsM;
|
||||
input logic inf; // Some input logic is infinity
|
||||
input logic nanM; // Some input logic is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic [51:0] xman; // input logic X
|
||||
input logic [51:0] yman; // input logic Y
|
||||
input logic [51:0] zman; // input logic Z
|
||||
output logic [51:0] wman; // rounded result of FMAC
|
||||
output logic infinity; // Generate infinity on overflow
|
||||
output logic specialsel; // Select special result
|
||||
output logic expplus1;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
wire [51:0] nanres; // Propagated or generated NaN
|
||||
|
||||
// Compute if round should occur. This equation is derived from
|
||||
// the rounding tables.
|
||||
|
||||
// round to infinity - plus1 if positive
|
||||
// round to -infinity - plus1 if negitive
|
||||
// round to zero - do nothing
|
||||
// round to nearest even
|
||||
// {v[1], v[0], sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
|
||||
//***causes lint warning: %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:59: ALWAYS
|
||||
// %Warning-UNOPTFLAT: Example path: src/fpu/round.sv:42: wallypipelinedsoc.hart.fpu.fma2.round.plus1
|
||||
|
||||
always_comb begin
|
||||
case (FrmM)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
3'b010: plus1 = wsign;//round down
|
||||
3'b011: plus1 = ~wsign;//round up
|
||||
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
|
||||
default: plus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Compute rounded result
|
||||
assign v1 = v[53:2] + 1;
|
||||
// Determine if postnormalization is necessary
|
||||
// Predicted by all bits =1 before round +1
|
||||
|
||||
//assign postnormalize = &(v[53:2]) && plus1;
|
||||
|
||||
// Determine special result in event of of selection of a result from
|
||||
// another FPU functional unit, infinity, NAN, or underflow
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// input logics to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
nanM || inf;
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 52'b0 :
|
||||
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input logic if the input logic is NaN. Since we assume all
|
||||
// NaN input logics are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input logic NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input logic NaNs will provide the payload."
|
||||
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
// If the sum is zero and we round up, there is a special case in
|
||||
// which we produce a massive loss of significance and trap to software.
|
||||
// It is handled in the exception unit.
|
||||
assign expplus1 = v1[52] & ~specialsel & plus1;
|
||||
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,112 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: sign.v
|
||||
// Author: David Harris
|
||||
// Date: 12/1/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block manages the signs of the numbers.
|
||||
// 1 = negative
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input logic xsign; // Sign of X
|
||||
input logic ysign; // Sign of Y
|
||||
input logic zsign; // Sign of Z
|
||||
input logic isAdd;
|
||||
input logic negsum0; // Sum in +O mode is negative
|
||||
input logic negsum1; // Sum in +1 mode is negative
|
||||
input logic bsM; // sticky bit from addend
|
||||
input logic [2:0] FrmM; // Round toward minus infinity
|
||||
input logic [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input logic sumzero; // Sum = O
|
||||
input logic zinfM; // Y = Inf
|
||||
input logic inf; // Some input logic = Inf
|
||||
output logic wsign; // Sign of W
|
||||
output logic invz; // Invert addend into adder
|
||||
output logic negsum; // Negate result of adder
|
||||
output logic selsum1; // Select +1 mode from compound adder
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire zerosign; // sign if result= 0
|
||||
wire sumneg; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
logic tmp;
|
||||
logic psign;
|
||||
|
||||
// Compute sign of product
|
||||
|
||||
assign psign = xsign ^ ysign;
|
||||
|
||||
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
|
||||
|
||||
//do you invert z
|
||||
assign invz = (zsign ^ psign);
|
||||
|
||||
assign selsum1 = invz;
|
||||
//negate sum if its negitive
|
||||
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
|
||||
// is the sum negitive
|
||||
// if p - z is the sum negitive
|
||||
// if -p + z is the sum positive
|
||||
// if -p - z then the sum is negitive
|
||||
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
|
||||
//always @(invz or negsum0 or negsum1 or bsM or ps)
|
||||
// begin
|
||||
// if (~invz) begin // both input logics have same sign
|
||||
// negsum = 0;
|
||||
// selsum1 = 0;
|
||||
// end else if (bsM) begin // sticky bit set on addend
|
||||
// selsum1 = 0;
|
||||
// negsum = negsum0;
|
||||
// end else if (ps) begin // sticky bit set on product
|
||||
// selsum1 = 1;
|
||||
// negsum = negsum1;
|
||||
// end else begin // both sticky bits clear
|
||||
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
|
||||
// selsum1 = ~negsum1; //original
|
||||
// negsum = negsum1;
|
||||
// end
|
||||
//end
|
||||
|
||||
// Compute sign of result
|
||||
// This involves a special case when the sum is zero:
|
||||
// x+x retains the same sign as x even when x = +/- 0.
|
||||
// otherwise, x-x = +O unless in the RM mode when x-x = -0
|
||||
// There is also a special case for NaNs and invalid results;
|
||||
// the sign of the NaN produced is forced to be 0.
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
|
||||
// assign zerosign = (~invz && killprodM) ? zsign : 0;
|
||||
// zero sign
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// addition
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
// subtraction
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
|
||||
assign zerosign = FmaFlagsM[1] ? psign :
|
||||
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
|
||||
(psign^zsign ? psign : FrmM == 3'b010));
|
||||
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
|
||||
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
|
||||
endmodule
|
@ -1,67 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: special.v
|
||||
// Author: David Harris
|
||||
// Date: 12/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements special case handling for unusual operands (e.g.
|
||||
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE,
|
||||
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] FInput1E; // Input FInput1E
|
||||
input logic [63:0] FInput2E; // Input FInput2E
|
||||
input logic [63:0] FInput3E; // Input FInput3E
|
||||
output logic xzeroE; // Input FInput1E = 0
|
||||
output logic yzeroE; // Input FInput2E = 0
|
||||
output logic zzeroE; // Input FInput3E = 0
|
||||
output logic xnanE; // FInput1E is NaN
|
||||
output logic ynanE; // FInput2E is NaN
|
||||
output logic znanE; // FInput3E is NaN
|
||||
output logic xdenormE; // FInput1E is denormalized
|
||||
output logic ydenormE; // FInput2E is denormalized
|
||||
output logic zdenormE; // FInput3E is denormalized
|
||||
output logic xinfE; // FInput1E is infinity
|
||||
output logic yinfE; // FInput2E is infinity
|
||||
output logic zinfE; // FInput3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnanE = &FInput1E[62:52] && |FInput1E[51:0];
|
||||
assign ynanE = &FInput2E[62:52] && |FInput2E[51:0];
|
||||
assign znanE = &FInput3E[62:52] && |FInput3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
|
||||
assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
|
||||
assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0];
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
|
||||
assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
|
||||
assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
// In the circuit implementation, this can be optimized
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzeroE = ~(|FInput1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|FInput2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|FInput3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|FInput1E[62:0]);
|
||||
assign yzeroE = ~(|FInput2E[62:0]);
|
||||
assign zzeroE = ~(|FInput3E[62:0]);
|
||||
endmodule
|
@ -122,6 +122,9 @@ string tests32f[] = '{
|
||||
};
|
||||
|
||||
string tests64d[] = '{
|
||||
"rv64d/I-FNMADD-D-01", "2000",
|
||||
"rv64d/I-FNMSUB-D-01", "2000",
|
||||
"rv64d/I-FMSUB-D-01", "2000",
|
||||
"rv64d/I-FMAX-D-01", "2000",
|
||||
"rv64d/I-FMIN-D-01", "2000",
|
||||
"rv64d/I-FLE-D-01", "2000",
|
||||
@ -143,12 +146,9 @@ string tests32f[] = '{
|
||||
"rv64d/I-FSD-01", "2000",
|
||||
"rv64d/I-FLD-01", "2420",
|
||||
"rv64d/I-FMADD-D-01", "2000",
|
||||
// "rv64d/I-FMSUB-D-01", "2000",
|
||||
// "rv64d/I-FMUL-D-01", "2000",
|
||||
"rv64d/I-FMV-D-X-01", "2000",
|
||||
"rv64d/I-FMV-X-D-01", "2000",
|
||||
// "rv64d/I-FNMADD-D-01", "2000",
|
||||
// "rv64d/I-FNMSUB-D-01", "2000",
|
||||
"rv64d/I-FMUL-D-01", "2000",
|
||||
// "rv64d/I-FMV-D-X-01", "2000",
|
||||
// "rv64d/I-FMV-X-D-01", "2000",
|
||||
"rv64d/I-FSGNJ-D-01", "2000",
|
||||
"rv64d/I-FSGNJN-D-01", "2000",
|
||||
"rv64d/I-FSGNJX-D-01", "2000",
|
||||
|
Loading…
Reference in New Issue
Block a user