diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index c6f80d497..36cda4d91 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -38,12 +38,13 @@ `define IEEE754 1 // MISA RISC-V configuration per specification -`define MISA (32'h00000104 | 1 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) +//16 - quad 3 - double 5 - single +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 -`define ZFH_SUPPORTED 0 +`define ZFH_SUPPORTED 1 /// Microarchitectural Features `define UARCH_PIPELINED 1 diff --git a/pipelined/regression/fp.do b/pipelined/regression/fp.do new file mode 100644 index 000000000..208118fc6 --- /dev/null +++ b/pipelined/regression/fp.do @@ -0,0 +1,52 @@ +# wally-pipelined.do +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m" + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals +# $num = the added words after the call +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 + +vsim -voptargs=+acc work.testbenchfp -G TEST=$2 + +view wave +#-- display input and output signals as hexidecimal values +#do ./wave-dos/peripheral-waves.do +#add log -recursive /* +#do wave.do deal with when ready + +do wave-fpu.do + +#-- Run the Simulation +#run 3600 +run -all +noview testbench-fp.sv +view wave + diff --git a/pipelined/regression/sim-fp b/pipelined/regression/sim-fp new file mode 100755 index 000000000..1d6425425 --- /dev/null +++ b/pipelined/regression/sim-fp @@ -0,0 +1,11 @@ + +# cvtint - test integer conversion unit (fcvtint) +# cvtfp - test floating-point conversion unit (fcvtfp) +# cmp - test comparison unit's LT, LE, EQ opperations (fcmp) +# add - test addition +# sub - test subtraction +# div - test division +# sqrt - test square root +# all - test everything + +vsim -do "do fp.do rv64fp mul" diff --git a/pipelined/regression/sim-fp-batch b/pipelined/regression/sim-fp-batch new file mode 100755 index 000000000..26085239d --- /dev/null +++ b/pipelined/regression/sim-fp-batch @@ -0,0 +1,10 @@ +# cvtint - test integer conversion unit (fcvtint) +# cvtfp - test floating-point conversion unit (fcvtfp) +# cmp - test comparison unit's LT, LE, EQ opperations (fcmp) +# add - test addition +# sub - test subtraction +# div - test division +# sqrt - test square root +# all - test everything + +vsim -c -do "do fp.do rv64fp mul" \ No newline at end of file diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do new file mode 100644 index 000000000..d2ea6d486 --- /dev/null +++ b/pipelined/regression/wave-fpu.do @@ -0,0 +1,102 @@ + +add wave -noupdate /testbenchfp/clk +add wave -noupdate -radix decimal /testbenchfp/VectorNum +add wave -group Other -noupdate /testbenchfp/FrmNum +add wave -group Other -noupdate /testbenchfp/X +add wave -group Other -noupdate /testbenchfp/Y +add wave -group Other -noupdate /testbenchfp/Z +add wave -group Other -noupdate /testbenchfp/Res +add wave -group Other -noupdate /testbenchfp/Ans + +add wave -group Rne -noupdate /testbenchfp/FmaRneX +add wave -group Rne -noupdate /testbenchfp/FmaRneY +add wave -group Rne -noupdate /testbenchfp/FmaRneZ +add wave -group Rne -noupdate /testbenchfp/FmaRneRes +add wave -group Rne -noupdate /testbenchfp/FmaRneAns +add wave -group Rz -noupdate /testbenchfp/FmaRzX +add wave -group Rz -noupdate /testbenchfp/FmaRzY +add wave -group Rz -noupdate /testbenchfp/FmaRzZ +add wave -group Rz -noupdate /testbenchfp/FmaRzRes +add wave -group Rz -noupdate /testbenchfp/FmaRzAns +add wave -group Ru -noupdate /testbenchfp/FmaRuX +add wave -group Ru -noupdate /testbenchfp/FmaRuY +add wave -group Ru -noupdate /testbenchfp/FmaRuZ +add wave -group Ru -noupdate /testbenchfp/FmaRuRes +add wave -group Ru -noupdate /testbenchfp/FmaRuAns +add wave -group Rd -noupdate /testbenchfp/FmaRdX +add wave -group Rd -noupdate /testbenchfp/FmaRdY +add wave -group Rd -noupdate /testbenchfp/FmaRdZ +add wave -group Rd -noupdate /testbenchfp/FmaRdRes +add wave -group Rd -noupdate /testbenchfp/FmaRdAns +add wave -group Rnm -noupdate /testbenchfp/FmaRnmX +add wave -group Rnm -noupdate /testbenchfp/FmaRnmY +add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ +add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes +add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns +add wave -group AllSignals -noupdate /* +add wave -group AllSignals -noupdate /testbenchfp/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/* +add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/* +add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/* +add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/* +add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/* \ No newline at end of file diff --git a/pipelined/src/fpu/fcvtfp.sv b/pipelined/src/fpu/fcvtfp.sv index fb8e1ad9a..f43d15661 100644 --- a/pipelined/src/fpu/fcvtfp.sv +++ b/pipelined/src/fpu/fcvtfp.sv @@ -1,6 +1,6 @@ `include "wally-config.vh" -module cvtfp ( +module fcvtfp ( input logic [10:0] XExpE, // input's exponent input logic [52:0] XManE, // input's mantissa input logic XSgnE, // input's sign diff --git a/pipelined/src/fpu/fcvtint.sv b/pipelined/src/fpu/fcvtint.sv index 6a6686993..97007d660 100644 --- a/pipelined/src/fpu/fcvtint.sv +++ b/pipelined/src/fpu/fcvtint.sv @@ -2,7 +2,7 @@ `include "wally-config.vh" // `include "../../config/rv64icfd/wally-config.vh" // `define XLEN 64 -module fcvt ( +module fcvtint ( input logic XSgnE, // X's sign input logic [10:0] XExpE, // X's exponent input logic [52:0] XManE, // X's fraction diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 69d6fc8ee..71d990371 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -43,6 +43,7 @@ module fma( input logic XSgnM, YSgnM, // input signs - memory stage input logic [`NE-1:0] ZExpM, // input exponents - memory stage input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage + input logic ZOrigDenormE, // is the original precision denormalized input logic XDenormE, YDenormE, ZDenormE, // is denorm input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage input logic XNaNM, YNaNM, ZNaNM, // is NaN @@ -72,6 +73,7 @@ module fma( logic PSgnE, PSgnM; logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM; logic Mult; + logic ZOrigDenormM; fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, @@ -81,11 +83,11 @@ module fma( // E/M pipeline registers flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0]}, - {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult}); + flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE}, + {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM}); - fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, + fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM, .FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult, .FMAResM, .FMAFlgM); @@ -448,6 +450,7 @@ module fma2( input logic [3*`NF+5:0] SumM, // the positive sum input logic NegSumM, // was the sum negitive input logic InvZM, // do you invert Z + input logic ZOrigDenormM, // is the original precision denormalized input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic Mult, // multiply opperation @@ -530,7 +533,7 @@ module fma2( // Select the result /////////////////////////////////////////////////////////////////////////////// - resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, + resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM, .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM); @@ -1103,6 +1106,7 @@ module resultselect( input logic KillProdM, // set the product to zero before addition if the product is too small to matter input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + input logic ZOrigDenormM, // is the original precision denormalized input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic ResultSgn, // the result's sign @@ -1122,7 +1126,7 @@ module resultselect( assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end @@ -1138,7 +1142,7 @@ module resultselect( assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; - assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end else begin assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end @@ -1147,7 +1151,7 @@ module resultselect( {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1160,7 +1164,7 @@ module resultselect( XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end @@ -1177,13 +1181,13 @@ module resultselect( XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; - InvalidResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end else begin XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1193,14 +1197,14 @@ module resultselect( XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]}; YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]}; ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]}; - InvalidResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; end else begin XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)}; - KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)}; NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]}; @@ -1231,7 +1235,7 @@ module resultselect( XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end else begin XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end @@ -1248,13 +1252,13 @@ module resultselect( XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]}; YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]}; ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]}; - InvalidResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; end else begin XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; - KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:0], ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]}; @@ -1264,14 +1268,14 @@ module resultselect( XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]}; YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]}; ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]}; - InvalidResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; end else begin XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; - KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]}; @@ -1289,7 +1293,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; - KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:0], ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 9a78a36b2..b4b5a2e98 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -104,6 +104,7 @@ module fpu ( logic XInfQ, YInfQ; // is the input infinity - divide logic XExpMaxE; // is the exponent all ones (max value) logic XNormE; // is normal + logic ZOrigDenormE; logic FmtQ; logic FOpCtrlQ; @@ -176,7 +177,7 @@ module fpu ( // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, + unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); @@ -192,7 +193,7 @@ module fpu ( .XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, + .FOpCtrlE, .ZOrigDenormE, .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); @@ -213,12 +214,12 @@ module fpu ( .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); // other FP execution units - cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); + fcvtfp fcvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE); fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, + fcvtint fcvtint (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // data to be stored in memory - to IEU diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 1c0589e10..eadaa7f2b 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -2,7 +2,7 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 10 - double 11 - quad 10 - half + input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) @@ -12,6 +12,7 @@ module unpack ( output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity + output logic ZOrigDenormE, // is the original precision denormalized output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) ); @@ -47,10 +48,11 @@ module unpack ( assign XExpMaxE = &XExpE; assign YExpMaxE = &YExpE; assign ZExpMaxE = &ZExpE; + + assign ZOrigDenormE = 1'b0; end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported - //***need better names for these constants // largest format | smaller format //---------------------------------- @@ -70,7 +72,8 @@ module unpack ( // quad and half // double and half - logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed + logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed + logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; @@ -91,9 +94,15 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // extract the exponent, converting the smaller exponent into the larger precision if nessisary - assign XExpE = FmtE ? X[`FLEN-2:`NF] : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - assign YExpE = FmtE ? Y[`FLEN-2:`NF] : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + // - if the original precision had a denormal number convert the exponent value 1 + assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; + assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; + assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + + // is the input (in it's original format) denormalized + assign XOrigDenormE = (FmtE ? 0 : |XLen1[`LEN1-2:`NF1]) & ~XFracZero; + assign YOrigDenormE = (FmtE ? 0 : |YLen1[`LEN1-2:`NF1]) & ~YFracZero; + assign ZOrigDenormE = (FmtE ? 0 : |ZLen1[`LEN1-2:`NF1]) & ~ZFracZero; // extract the fraction, add trailing zeroes to the mantissa if nessisary assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; @@ -130,8 +139,9 @@ module unpack ( // quad and double and half // quad and single and half - logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision - logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision + logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision + logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision + logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; @@ -143,6 +153,75 @@ module unpack ( assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; + // There are 2 case statements + // - one for other singals and one for sgn/exp/frac + // - need two for the dependencies in the expoenent calculation + always_comb begin + case (FmtE) + `FMT: begin // if input is largest precision (`FLEN - ie quad or double) + + // This is the original format so set OrigDenorm to 0 + XOrigDenormE = 1'b0; + YOrigDenormE = 1'b0; + ZOrigDenormE = 1'b0; + + // is the exponent non-zero + XExpNonzero = |X[`FLEN-2:`NF]; + YExpNonzero = |Y[`FLEN-2:`NF]; + ZExpNonzero = |Z[`FLEN-2:`NF]; + + // is the exponent all 1's + XExpMaxE = &X[`FLEN-2:`NF]; + YExpMaxE = &Y[`FLEN-2:`NF]; + ZExpMaxE = &Z[`FLEN-2:`NF]; + end + `FMT1: begin // if input is larger precsion (`LEN1 - double or single) + + // is the input (in it's original format) denormalized + XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; + YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; + ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; + + // is the exponent non-zero + XExpNonzero = |XLen1[`LEN1-2:`NF1]; + YExpNonzero = |YLen1[`LEN1-2:`NF1]; + ZExpNonzero = |ZLen1[`LEN1-2:`NF1]; + + // is the exponent all 1's + XExpMaxE = &XLen1[`LEN1-2:`NF1]; + YExpMaxE = &YLen1[`LEN1-2:`NF1]; + ZExpMaxE = &ZLen1[`LEN1-2:`NF1]; + end + `FMT2: begin // if input is smallest precsion (`LEN2 - single or half) + + // is the input (in it's original format) denormalized + XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero; + YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero; + ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero; + + // is the exponent non-zero + XExpNonzero = |XLen2[`LEN2-2:`NF2]; + YExpNonzero = |YLen2[`LEN2-2:`NF2]; + ZExpNonzero = |ZLen2[`LEN2-2:`NF2]; + + // is the exponent all 1's + XExpMaxE = &XLen2[`LEN2-2:`NF2]; + YExpMaxE = &YLen2[`LEN2-2:`NF2]; + ZExpMaxE = &ZLen2[`LEN2-2:`NF2]; + end + default: begin + XOrigDenormE = 0; + YOrigDenormE = 0; + ZOrigDenormE = 0; + XExpNonzero = 0; + YExpNonzero = 0; + ZExpNonzero = 0; + XExpMaxE = 0; + YExpMaxE = 0; + ZExpMaxE = 0; + end + endcase + end always_comb begin case (FmtE) `FMT: begin // if input is largest precision (`FLEN - ie quad or double) @@ -160,16 +239,6 @@ module unpack ( XFracE = X[`NF-1:0]; YFracE = Y[`NF-1:0]; ZFracE = Z[`NF-1:0]; - - // is the exponent non-zero - XExpNonzero = |X[`FLEN-2:`NF]; - YExpNonzero = |Y[`FLEN-2:`NF]; - ZExpNonzero = |Z[`FLEN-2:`NF]; - - // is the exponent all 1's - XExpMaxE = &X[`FLEN-2:`NF]; - YExpMaxE = &Y[`FLEN-2:`NF]; - ZExpMaxE = &Z[`FLEN-2:`NF]; end `FMT1: begin // if input is larger precsion (`LEN1 - double or single) @@ -187,24 +256,14 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the larger precision's exponent to use the largest precision's bias - XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)}; ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - - // is the exponent non-zero - XExpNonzero = |XLen1[`LEN1-2:`NF1]; - YExpNonzero = |YLen1[`LEN1-2:`NF1]; - ZExpNonzero = |ZLen1[`LEN1-2:`NF1]; - - // is the exponent all 1's - XExpMaxE = &XLen1[`LEN1-2:`NF1]; - YExpMaxE = &YLen1[`LEN1-2:`NF1]; - ZExpMaxE = &ZLen1[`LEN1-2:`NF1]; end `FMT2: begin // if input is smallest precsion (`LEN2 - single or half) @@ -222,24 +281,14 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the smallest precision's exponent to use the largest precision's bias - XExpE = {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; - YExpE = {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; - ZExpE = {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)}; YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)}; ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)}; - - // is the exponent non-zero - XExpNonzero = |XLen2[`LEN2-2:`NF2]; - YExpNonzero = |YLen2[`LEN2-2:`NF2]; - ZExpNonzero = |ZLen2[`LEN2-2:`NF2]; - - // is the exponent all 1's - XExpMaxE = &XLen2[`LEN2-2:`NF2]; - YExpMaxE = &YLen2[`LEN2-2:`NF2]; - ZExpMaxE = &ZLen2[`LEN2-2:`NF2]; end default: begin XSgnE = 0; @@ -251,12 +300,6 @@ module unpack ( XFracE = 0; YFracE = 0; ZFracE = 0; - XExpNonzero = 0; - YExpNonzero = 0; - ZExpNonzero = 0; - XExpMaxE = 0; - YExpMaxE = 0; - ZExpMaxE = 0; end endcase end @@ -272,9 +315,10 @@ module unpack ( // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 - logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision - logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision - logic [`LEN2-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision + logic [`D_LEN-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision + logic [`S_LEN-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision + logic [`H_LEN-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision + logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; @@ -291,6 +335,83 @@ module unpack ( assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + + // There are 2 case statements + // - one for other singals and one for sgn/exp/frac + // - need two for the dependencies in the expoenent calculation + always_comb begin + case (FmtE) + 2'b11: begin // if input is quad percision + + // This is the original format so set OrigDenorm to 0 + XOrigDenormE = 1'b0; + YOrigDenormE = 1'b0; + ZOrigDenormE = 1'b0; + + // is the exponent non-zero + XExpNonzero = |X[`Q_LEN-2:`Q_NF]; + YExpNonzero = |Y[`Q_LEN-2:`Q_NF]; + ZExpNonzero = |Z[`Q_LEN-2:`Q_NF]; + + // is the exponent all 1's + XExpMaxE = &X[`Q_LEN-2:`Q_NF]; + YExpMaxE = &Y[`Q_LEN-2:`Q_NF]; + ZExpMaxE = &Z[`Q_LEN-2:`Q_NF]; + end + 2'b01: begin // if input is double percision + + // is the exponent all 1's + XExpMaxE = &XLen1[`D_LEN-2:`D_NF]; + YExpMaxE = &YLen1[`D_LEN-2:`D_NF]; + ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF]; + + // is the input (in it's original format) denormalized + XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero; + YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero; + ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero; + + // is the exponent non-zero + XExpNonzero = |XLen1[`D_LEN-2:`D_NF]; + YExpNonzero = |YLen1[`D_LEN-2:`D_NF]; + ZExpNonzero = |ZLen1[`D_LEN-2:`D_NF]; + end + 2'b00: begin // if input is single percision + + // is the exponent all 1's + XExpMaxE = &XLen2[`S_LEN-2:`S_NF]; + YExpMaxE = &YLen2[`S_LEN-2:`S_NF]; + ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF]; + + // is the input (in it's original format) denormalized + XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero; + YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero; + ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero; + + // is the exponent non-zero + XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; + YExpNonzero = |YLen2[`S_LEN-2:`S_NF]; + ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF]; + end + 2'b10: begin // if input is half percision + + // is the exponent all 1's + XExpMaxE = &XLen3[`H_LEN-2:`H_NF]; + YExpMaxE = &YLen3[`H_LEN-2:`H_NF]; + ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF]; + + // is the input (in it's original format) denormalized + XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero; + YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero; + ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero; + + // is the exponent non-zero + XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; + YExpNonzero = |YLen3[`H_LEN-2:`H_NF]; + ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF]; + end + endcase + end + always_comb begin case (FmtE) 2'b11: begin // if input is quad percision @@ -308,16 +429,6 @@ module unpack ( XFracE = X[`Q_NF-1:0]; YFracE = Y[`Q_NF-1:0]; ZFracE = Z[`Q_NF-1:0]; - - // is the exponent non-zero - XExpNonzero = |X[`Q_LEN-2:`Q_NF]; - YExpNonzero = |Y[`Q_LEN-2:`Q_NF]; - ZExpNonzero = |Z[`Q_LEN-2:`Q_NF]; - - // is the exponent all 1's - XExpMaxE = &X[`Q_LEN-2:`Q_NF]; - YExpMaxE = &Y[`Q_LEN-2:`Q_NF]; - ZExpMaxE = &Z[`Q_LEN-2:`Q_NF]; end 2'b01: begin // if input is double percision // extract sign bit @@ -334,24 +445,15 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the double precsion exponent into quad precsion - XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; - YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; - ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; + + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; - YFracE = {YLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; - ZFracE = {ZLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; - - // is the exponent non-zero - XExpNonzero = |XLen1[`D_LEN-2:`D_NE]; - YExpNonzero = |YLen1[`D_LEN-2:`D_NE]; - ZExpNonzero = |ZLen1[`D_LEN-2:`D_NE]; - - // is the exponent all 1's - XExpMaxE = &XLen1[`D_LEN-2:`D_NE]; - YExpMaxE = &YLen1[`D_LEN-2:`D_NE]; - ZExpMaxE = &ZLen1[`D_LEN-2:`D_NE]; + XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; + YFracE = {YLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; + ZFracE = {ZLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; end 2'b00: begin // if input is single percision // extract sign bit @@ -368,24 +470,14 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the single precsion exponent into quad precsion - XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; - YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; - ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - - // is the exponent non-zero - XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; - YExpNonzero = |YLen2[`S_LEN-2:`S_NF]; - ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF]; - - // is the exponent all 1's - XExpMaxE = &XLen2[`S_LEN-2:`S_NF]; - YExpMaxE = &YLen2[`S_LEN-2:`S_NF]; - ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF]; end 2'b10: begin // if input is half percision // extract sign bit @@ -400,26 +492,16 @@ module unpack ( // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b // dexp = 0bdd dbbb bbbb // also need to take into account possible zero/denorm/inf/NaN values - + // convert the half precsion exponent into quad precsion - XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; - YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; - ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; - - // is the exponent non-zero - XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; - YExpNonzero = |YLen3[`H_LEN-2:`H_NF]; - ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF]; - - // is the exponent all 1's - XExpMaxE = &XLen3[`H_LEN-2:`H_NF]; - YExpMaxE = &YLen3[`H_LEN-2:`H_NF]; - ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF]; end endcase end diff --git a/pipelined/src/ppa/ppa.sv b/pipelined/src/ppa/ppa.sv index 0207c99f7..32fc45e29 100644 --- a/pipelined/src/ppa/ppa.sv +++ b/pipelined/src/ppa/ppa.sv @@ -313,6 +313,7 @@ module ppa_shifter #(parameter WIDTH=32) ( assign Y = zshift[WIDTH-1:0]; endmodule +// just report one hot module ppa_prioritythermometer #(parameter N = 8) ( input logic [N-1:0] a, output logic [N-1:0] y); @@ -338,7 +339,7 @@ module ppa_priorityonehot #(parameter N = 8) ( assign y = a & nolower; endmodule -module ppa_prioriyencoder #(parameter N = 8) ( +module ppa_priorityencoder #(parameter N = 8) ( input logic [N-1:0] a, output logic [$clog2(N)-1:0] y); // Carefully crafted so design compiler will synthesize into a fast tree structure diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv new file mode 100644 index 000000000..d79bc6d19 --- /dev/null +++ b/pipelined/testbench/testbench-fp.sv @@ -0,0 +1,1543 @@ + +`include "wally-config.vh" +`include "tests-fp.vh" + +// steps to run FMA Tests +// 1) create test vectors in riscv-wally/Tests/fp with: ./run-all.sh +// 2) go to riscv-wally/pipelined/testbench/fp/Tests +// 3) run ./sim-fma-batch +//*** drop the any constants in each file and figure out a way to do them without the code +module testbenchfp; + parameter TEST="none"; + + string Tests[]; + logic [2:0] OpCtrl[]; + logic [2:0] Unit[]; + string FmaRneTests[]; + string FmaRuTests[]; + string FmaRdTests[]; + string FmaRzTests[]; + string FmaRnmTests[]; + logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rne, rz, ru, rd, rnm + logic [1:0] Fmt[]; + logic [1:0] FmaFmt[]; + + + logic clk=0; + logic [31:0] TestNum=0; + logic [31:0] OpCtrlNum=0; + logic [31:0] errors=0; + logic [31:0] VectorNum=0; + logic [31:0] FrmNum=0; + logic [31:0] FmaNum=0; + logic [`FLEN*4+7:0] TestVectors[46464:0]; + logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; + logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; + logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; + logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; + logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; + + logic [1:0] FmaFmtVal, FmtVal; + logic [2:0] UnitVal, OpCtrlVal, FrmVal; + logic NaNGood; + logic ZOrigDenorm, FmaRneZOrigDenorm, FmaRzZOrigDenorm, FmaRuZOrigDenorm, FmaRdZOrigDenorm, FmaRnmZOrigDenorm; + logic FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; + logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat + logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat + logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat + logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat + logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat + logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat + logic [`XLEN-1:0] SrcA; // integer input + logic [`FLEN-1:0] Ans; // result from TestFloat + logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat + logic [`FLEN-1:0] Res; + logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // result from Units + logic [4:0] AnsFlags; // flags read form testfloat + logic [4:0] FmaRneAnsFlags, FmaRzAnsFlags, FmaRuAnsFlags, FmaRdAnsFlags, FmaRnmAnsFlags; // flags read form testfloat + logic [4:0] ResFlags; // Res's flags + logic [4:0] FmaRneResFlags, FmaRzResFlags, FmaRuResFlags, FmaRdResFlags, FmaRnmResFlags; // flags read form testfloat + logic [2:0] FrmE; // rounding mode + logic [`FPSIZES/3:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad + logic [3:0] FpuUnit; // Which unit is being tested + logic [`FLEN-1:0] FMAResM, DivResM, CmpResE, CvtResE, CvtFpResE; // Ress + logic [4:0] FMAFlgM, CvtFpFlgM, DivFlgM, CvtIntFlgM, CmpFlgM; // FMA's outputed flags + logic CmpNVE; + logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN + logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN + logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp; + logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky; + logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd; + logic XSgn, YSgn, ZSgn; + logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn; + logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn; + logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn; + logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn; + logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn; + logic [`NE-1:0] XExp, YExp, ZExp; + logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp; + logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp; + logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp; + logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp; + logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp; + logic [`NF:0] XMan, YMan, ZMan; + logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan; + logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan; + logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan; + logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan; + logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan; + logic XNorm; + logic XExpMaxE; + logic XNaN, YNaN, ZNaN; + logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN; + logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN; + logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN; + logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN; + logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN; + logic XSNaN, YSNaN, ZSNaN; + logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN; + logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN; + logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN; + logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN; + logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN; + logic XDenorm, YDenorm, ZDenorm; + logic FmaRneXDenorm, FmaRneYDenorm, FmaRneZDenorm; + logic FmaRzXDenorm, FmaRzYDenorm, FmaRzZDenorm; + logic FmaRuXDenorm, FmaRuYDenorm, FmaRuZDenorm; + logic FmaRdXDenorm, FmaRdYDenorm, FmaRdZDenorm; + logic FmaRnmXDenorm, FmaRnmYDenorm, FmaRnmZDenorm; + logic XInf, YInf, ZInf; + logic FmaRneXInf, FmaRneYInf, FmaRneZInf; + logic FmaRzXInf, FmaRzYInf, FmaRzZInf; + logic FmaRuXInf, FmaRuYInf, FmaRuZInf; + logic FmaRdXInf, FmaRdYInf, FmaRdZInf; + logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf; + logic XZero, YZero, ZZero; + logic FmaRneXZero, FmaRneYZero, FmaRneZZero; + logic FmaRzXZero, FmaRzYZero, FmaRzZZero; + logic FmaRuXZero, FmaRuYZero, FmaRuZZero; + logic FmaRdXZero, FmaRdYZero, FmaRdZZero; + logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero; + logic XExpMax, YExpMax, ZExpMax, Mult; + logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum; + logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ; + logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum; + logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff; + logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn; + logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt; + + + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||| ||||||||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||||||||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // select tests relevent to the specified configuration + // cvtint - test integer conversion unit (fcvtint) + // cvtfp - test floating-point conversion unit (fcvtfp) + // cmp - test comparison unit's LT, LE, EQ opperations (fcmp) + // add - test addition + // sub - test subtraction + // div - test division + // sqrt - test square root + // all - test all of the above + initial begin + $display("TEST is %s", TEST); + if (`Q_SUPPORTED) begin // if Quad percision is supported + if (TEST === "cvtint"| TEST === "all") begin // if testing integer conversion + // add the 128-bit cvtint tests to the to-be-tested list + Tests = {Tests, f128rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported add their conversions + Tests = {Tests, f128rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if the floating-point conversions are being tested + if(`D_SUPPORTED) begin // if double precision is supported + // add the 128 <-> 64 bit conversions to the to-be-tested list + Tests = {Tests, f128f64cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b01, 3'b11}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if(`F_SUPPORTED) begin // if single precision is supported + // add the 128 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f128f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b11}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if(`ZFH_SUPPORTED) begin // if half precision is supported + // add the 128 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f128f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b11}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested + // add the compare tests/op-ctrls/unit/fmt + Tests = {Tests, f128cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the addition tests/op-ctrls/unit/fmt + Tests = {Tests, f128add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the subtraction tests/op-ctrls/unit/fmt + Tests = {Tests, f128sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the multiply tests/op-ctrls/unit/fmt + Tests = {Tests, f128mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested + // add each rounding mode to it's own list of tests + // - fma tests are very long, so run all rounding modes in parallel + FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"}; + FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"}; + FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"}; + FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"}; + FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"}; + // add the format for the Fma + for(int i = 0; i<5; i++) begin + FmaFmt = {FmaFmt, 2'b11}; + end + end + end + if (`D_SUPPORTED) begin // if double precision is supported + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f64rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + if (`XLEN == 64) begin // if 64-bit integers are being supported + Tests = {Tests, f64rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested + if(`F_SUPPORTED) begin // if single precision is supported + // add the 64 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f64f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b01}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if(`ZFH_SUPPORTED) begin // if half precision is supported + // add the 64 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f64f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b01}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested + // add each rounding mode to it's own list of tests + // - fma tests are very long, so run all rounding modes in parallel + FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"}; + FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"}; + FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"}; + FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"}; + FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"}; + for(int i = 0; i<5; i++) begin + FmaFmt = {FmaFmt, 2'b01}; + end + end + end + if (`F_SUPPORTED) begin // if single precision being supported + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f32rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f32rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested + if(`ZFH_SUPPORTED) begin + // add the 32 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f32f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b00}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<10; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiply is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + // add each rounding mode to it's own list of tests + // - fma tests are very long, so run all rounding modes in parallel + FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"}; + FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"}; + FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"}; + FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"}; + FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"}; + for(int i = 0; i<5; i++) begin + FmaFmt = {FmaFmt, 2'b00}; + end + end + end + if (`ZFH_SUPPORTED) begin // if half precision supported + if (TEST === "cvtint"| TEST === "all") begin // if in conversions are being tested + Tests = {Tests, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (`XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f16rv64cvtint, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + // add each rounding mode to it's own list of tests + // - fma tests are very long, so run all rounding modes in parallel + FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"}; + FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"}; + FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"}; + FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"}; + FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"}; + for(int i = 0; i<5; i++) begin + FmaFmt = {FmaFmt, 2'b10}; + end + end + end + + // check if nothing is being tested + if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin + $display("TEST %s not supported in this configuration", TEST); + $stop; + end + end + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||||| ||||||| ||||||||| |||||||| ||||||| ||||||||| + // ||| ||| ||| ||| ||| || || ||| ||| ||| ||| + // |||||||| |||||||| ||||||||| || || ||| |||||||| ||||||| ||| + // ||| || ||| ||| ||| || || ||| ||| ||| ||| + // ||| ||| |||||||| ||| ||| ||||||| ||| |||||||| ||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // Read the first test + initial begin + $display("\n\nRunning %s vectors", Tests[TestNum]); + $readmemh({`PATH, Tests[TestNum]}, TestVectors); + $readmemh({`PATH, FmaRneTests[TestNum]}, FmaRneVectors); + $readmemh({`PATH, FmaRuTests[TestNum]}, FmaRuVectors); + $readmemh({`PATH, FmaRdTests[TestNum]}, FmaRdVectors); + $readmemh({`PATH, FmaRzTests[TestNum]}, FmaRzVectors); + $readmemh({`PATH, FmaRnmTests[TestNum]}, FmaRnmVectors); + // set the test index to 0 + TestNum = 0; + end + + // set a the signals for all tests + always_comb FmaFmtVal = FmaFmt[FmaNum]; + always_comb UnitVal = Unit[TestNum]; + always_comb FmtVal = Fmt[TestNum]; + always_comb OpCtrlVal = OpCtrl[OpCtrlNum]; + always_comb FrmVal = Frm[FrmNum]; + assign Mult = OpCtrlVal === 3'b100; + + // modify the format signal if only 2 percisions supported + // - 1 for the larger precision + // - 0 for the smaller precision + always_comb begin + if(`FPSIZES/3 === 1) ModFmt = FmtVal; + else ModFmt = FmtVal === `FMT; + if(`FPSIZES/3 === 1) FmaModFmt = FmaFmtVal; + else FmaModFmt = FmaFmtVal === `FMT; + end + + // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlags) from the current test vector + readfmavectors readfmarnevectors (.clk, .Frm(`RNE), .TestVector(FmaRneVectors[VectorNum]), .VectorNum, .Ans(FmaRneAns), .AnsFlags(FmaRneAnsFlags), + .XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), .FmaNum, + .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), + .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), + .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .ZOrigDenormE(FmaRneZOrigDenorm), + .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), + .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), + .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero), + .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal), + .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ)); + readfmavectors readfmarzvectors (.clk, .Frm(`RZ), .TestVector(FmaRzVectors[VectorNum]), .VectorNum, .Ans(FmaRzAns), .AnsFlags(FmaRzAnsFlags), + .XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaNum, .FmaModFmt, + .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), + .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), + .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .ZOrigDenormE(FmaRzZOrigDenorm), + .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), + .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), + .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero), + .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal), + .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ)); + readfmavectors readfmaruvectors (.clk, .Frm(`RU), .TestVector(FmaRuVectors[VectorNum]), .VectorNum, .Ans(FmaRuAns), .AnsFlags(FmaRuAnsFlags), + .XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaNum, .FmaModFmt, + .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), + .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), + .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .ZOrigDenormE(FmaRuZOrigDenorm), + .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), + .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), + .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero), + .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal), + .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ)); + readfmavectors readfmardvectors (.clk, .Frm(`RD), .TestVector(FmaRdVectors[VectorNum]), .VectorNum, .Ans(FmaRdAns), .AnsFlags(FmaRdAnsFlags), + .XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaNum, .FmaModFmt, + .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), + .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), + .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .ZOrigDenormE(FmaRdZOrigDenorm), + .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), + .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), + .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero), + .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal), + .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ)); + readfmavectors readfmarnmvectors (.clk, .Frm(`RNM), .TestVector(FmaRnmVectors[VectorNum]), .VectorNum, .Ans(FmaRnmAns), .AnsFlags(FmaRnmAnsFlags), + .XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaNum, .FmaModFmt, + .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), + .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .ZOrigDenormE(FmaRnmZOrigDenorm), + .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN), + .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), + .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), + .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero), + .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal), + .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ)); + readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlags(AnsFlags), .SrcA, + .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), + .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .ZOrigDenormE(ZOrigDenorm), + .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), + .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), + .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), + .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), + .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf),.XNormE(XNorm), .XExpMaxE(XExpMax), + .X, .Y, .Z); + + + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||||||| ||||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // instantiate devices under test + // - one fma for each precison + // - all the units for the other tests (including fma for add/sub/mul) + fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), + .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), + .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), + .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), + .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero), + .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ), + .NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn), + .ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd)); + fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn), + .ZExpM(FmaRneZExp), .ZOrigDenormM(FmaRneZOrigDenorm), + .XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan), + .XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN), + .XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero), + .XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf), + .XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN), + .KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp), + .SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff), + .PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE), + .FMAFlgM(FmaRneResFlags), .FMAResM(FmaRneRes), .Mult(1'b0)); + fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), + .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), + .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), + .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), + .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero), + .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ), + .NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn), + .ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd)); + fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn), + .ZExpM(FmaRzZExp), .ZOrigDenormM(FmaRzZOrigDenorm), + .XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan), + .XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN), + .XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero), + .XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf), + .XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN), + .KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp), + .SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff), + .PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ), + .FMAFlgM(FmaRzResFlags), .FMAResM(FmaRzRes), .Mult(1'b0)); + fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), + .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), + .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), + .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), + .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero), + .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ), + .NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn), + .ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd)); + fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn), + .ZExpM(FmaRuZExp), .ZOrigDenormM(FmaRuZOrigDenorm), + .XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan), + .XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN), + .XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero), + .XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf), + .XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN), + .KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp), + .SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff), + .PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU), + .FMAFlgM(FmaRuResFlags), .FMAResM(FmaRuRes), .Mult(1'b0)); + fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), + .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), + .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), + .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), + .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero), + .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ), + .NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn), + .ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd)); + fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn), + .ZExpM(FmaRdZExp), .ZOrigDenormM(FmaRdZOrigDenorm), + .XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan), + .XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN), + .XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero), + .XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf), + .XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN), + .KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp), + .SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff), + .PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD), + .FMAFlgM(FmaRdResFlags), .FMAResM(FmaRdRes), .Mult(1'b0)); + fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), + .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), + .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), + .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), + .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero), + .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ), + .NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn), + .ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd)); + fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn), + .ZExpM(FmaRnmZExp), .ZOrigDenormM(FmaRmeZOrigDenorm), + .XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan), + .XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN), + .XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero), + .XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf), + .XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN), + .KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp), + .SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff), + .PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM), + .FMAFlgM(FmaRnmResFlags), .FMAResM(FmaRnmRes), .Mult(1'b0)); + fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), + .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), + .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), + .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), + .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE, + .ProdExpE, .AddendStickyE, .KillProdE); + fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn), + .ZExpM(ZExp), .ZOrigDenormM(ZOrigDenorm), + .XManM(XMan), .YManM(YMan), .ZManM(ZMan), + .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), + .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), + .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), + .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), + .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), + .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), + .FMAFlgM, .FMAResM, .Mult); + // fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf), + // .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(Frmal), .FmtE(ModFmt), .CvtFpResE, .CvtFpFlgE); + // fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE, .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), + // .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), + // .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE, .CmpResE); + // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), + // .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal), + // .CvtResE, .CvtFlgE); + // *** integrade divide and squareroot + // fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), + // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, + // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivResM), .Flags(FDivFlgM)); + + // produce clock + always begin + clk = 1; #5; clk = 0; #5; + end + +/////////////////////////////////////////////////////////////////////////////////////////////// + +// ||||| ||| |||||||||| ||||| ||| +// ||||||| ||| ||| ||| ||||||| ||| +// |||| ||| ||| |||||||||| |||| ||| ||| +// |||| ||| ||| ||| ||| |||| ||| ||| +// |||| ||| ||| ||| ||| |||| ||| ||| +// |||| |||||| ||| ||| |||| |||||| + +/////////////////////////////////////////////////////////////////////////////////////////////// + + //Check if answer is a NaN + always_comb begin + case (FmaFmtVal) + 4'b11: begin // quad + FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]); + FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]); + FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]); + FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]); + FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]); + FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]); + FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]); + FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]); + FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]); + FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]); + end + 4'b01: begin // double + FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]); + FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]); + FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]); + FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]); + FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]); + FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]); + FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]); + FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]); + FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]); + FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]); + end + 4'b00: begin // single + FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]); + FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]); + FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]); + FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]); + FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]); + FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]); + FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]); + FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]); + FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]); + FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]); + end + 4'b10: begin // half + FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]); + FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]); + FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]); + FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]); + FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]); + FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]); + FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]); + FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]); + FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]); + FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]); + end + endcase + end + always_comb begin //***need for other units??? + if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin + AnsNaN = 1'b0; + ResNaN = 1'b0; + end + else begin + case (FmtVal) + 4'b11: begin // quad + AnsNaN = &Ans[`FLEN-2:`NF]&(|Ans[`NF-1:0]); + ResNaN = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]); + end + 4'b01: begin // double + AnsNaN = &Ans[`LEN1-2:`NF1]&(|Ans[`NF1-1:0]); + ResNaN = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]); + end + 4'b00: begin // single + AnsNaN = &Ans[`LEN2-2:`NF2]&(|Ans[`NF2-1:0]); + ResNaN = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]); + end + 4'b10: begin // half + AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]); + ResNaN = &FMAResM[`H_LEN-2:`H_NF]&(|FMAResM[`H_NF-1:0]); + end + endcase + end + end + + // check results on falling edge of clk + always @(negedge clk) begin + case (UnitVal) + `FMAUNIT: Res = FMAResM; + `DIVUNIT: Res = DivResM; + `CMPUNIT: Res = CmpResE; + `CVTINTUNIT: Res = CvtResE; + `CVTFPUNIT: Res = CvtFpResE; + endcase + case (UnitVal) + `FMAUNIT: ResFlags = FMAFlgM; + `DIVUNIT: ResFlags = DivFlgM; + `CMPUNIT: ResFlags = CmpFlgM; + `CVTINTUNIT: ResFlags = CvtIntFlgM; + `CVTFPUNIT: ResFlags = CvtFpFlgM; + endcase + + // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify: + // - the sign of the NaN does not matter for the opperations being tested + // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter + case (FmaFmtVal) + 4'b11: FmaRneNaNGood =((FmaRneAnsFlags[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) | + (FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) | + (FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]}))); + 4'b01: FmaRneNaNGood =((FmaRneAnsFlags[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) | + (FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) | + (FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]}))); + 4'b00: FmaRneNaNGood =((FmaRneAnsFlags[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) | + (FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) | + (FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]}))); + 4'b10: FmaRneNaNGood =((FmaRneAnsFlags[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) | + (FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) | + (FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]}))); + endcase + case (FmaFmtVal) + 4'b11: FmaRzNaNGood = ((FmaRzAnsFlags[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) | + (FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) | + (FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]}))); + 4'b01: FmaRzNaNGood = ((FmaRzAnsFlags[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) | + (FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) | + (FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]}))); + 4'b00: FmaRzNaNGood = ((FmaRzAnsFlags[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) | + (FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) | + (FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]}))); + 4'b10: FmaRzNaNGood = ((FmaRzAnsFlags[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) | + (FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) | + (FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]}))); + endcase + case (FmaFmtVal) + 4'b11: FmaRuNaNGood = ((FmaRuAnsFlags[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) | + (FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) | + (FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]}))); + 4'b01: FmaRuNaNGood = ((FmaRuAnsFlags[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (FmaRuAnsFlags[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) | + (FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) | + (FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) | + (FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]}))); + 4'b00: FmaRuNaNGood = ((FmaRuAnsFlags[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) | + (FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) | + (FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]}))); + 4'b10: FmaRuNaNGood = ((FmaRuAnsFlags[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) | + (FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) | + (FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]}))); + endcase + case (FmaFmtVal) + 4'b11: FmaRdNaNGood = ((FmaRdAnsFlags[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) | + (FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) | + (FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]}))); + 4'b01: FmaRdNaNGood = ((FmaRdAnsFlags[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) | + (FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) | + (FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]}))); + 4'b00: FmaRdNaNGood = ((FmaRdAnsFlags[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) | + (FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) | + (FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]}))); + 4'b10: FmaRdNaNGood = ((FmaRdAnsFlags[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) | + (FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) | + (FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]}))); + endcase + case (FmaFmtVal) + 4'b11: FmaRnmNaNGood =((FmaRnmAnsFlags[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) | + (FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) | + (FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]}))); + 4'b01: FmaRnmNaNGood =((FmaRnmAnsFlags[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) | + (FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) | + (FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]}))); + 4'b00: FmaRnmNaNGood =((FmaRnmAnsFlags[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) | + (FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) | + (FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]}))); + 4'b10: FmaRnmNaNGood =((FmaRnmAnsFlags[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) | + (FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) | + (FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]}))); + endcase + if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) + case (FmtVal) + 4'b11: NaNGood = ((AnsFlags[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) | + (YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})) | + (ZNaN&(Res[`Q_LEN-2:0] === {Z[`Q_LEN-2:`Q_NF],1'b1,Z[`Q_NF-2:0]}))); + 4'b01: NaNGood = ((AnsFlags[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | + (YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | + (ZNaN&(Res[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]}))); + 4'b00: NaNGood = ((AnsFlags[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | + (YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | + (ZNaN&(Res[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]}))); + 4'b10: NaNGood = ((AnsFlags[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | + (YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | + (ZNaN&(Res[`H_LEN-2:0] === {Z[`H_LEN-2:`H_NF],1'b1,Z[`H_NF-2:0]}))); + endcase + else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format + case (OpCtrlVal[1:0]) + 2'b11: NaNGood = ((AnsFlags[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | + (XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) | + (YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]}))); + 2'b01: NaNGood = ((AnsFlags[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | + (XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | + (YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]}))); + 2'b00: NaNGood = ((AnsFlags[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | + (XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | + (YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]}))); + 2'b10: NaNGood = ((AnsFlags[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | + (XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | + (YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]}))); + endcase + else NaNGood = 1'b0; + + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||||| ||||||| ||| |||||| + // ||| ||| ||| ||| ||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlags === AnsFlags | AnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in %s", Tests[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlags, Ans, AnsFlags); + $stop; + end + if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlags === FmaRneAnsFlags | FmaRneAnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in FMA - RNE"); + $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlags, FmaRneAns, FmaRneAnsFlags); + $stop; + end + if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlags === FmaRzAnsFlags | FmaRzAnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in FMA - RZ"); + $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlags, FmaRzAns, FmaRzAnsFlags); + $stop; + end + if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlags === FmaRuAnsFlags | FmaRuAnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in FMA - RU"); + $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlags, FmaRuAns, FmaRuAnsFlags); + $stop; + end + if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlags === FmaRdAnsFlags | FmaRdAnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in FMA - RD"); + $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlags, FmaRdAns, FmaRdAnsFlags); + $stop; + end + if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlags === FmaRnmAnsFlags | FmaRnmAnsFlags === 5'bx))) begin + errors += 1; + $display("There is an error in FMA - RNM"); + $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlags, FmaRnmAns, FmaRnmAnsFlags); + $stop; + end + VectorNum += 1; // increment test + if (TestVectors[VectorNum][0] === 1'bx & + FmaRneVectors[VectorNum][0] === 1'bx & + FmaRzVectors[VectorNum][0] === 1'bx & + FmaRuVectors[VectorNum][0] === 1'bx & + FmaRdVectors[VectorNum][0] === 1'bx & + FmaRnmVectors[VectorNum][0] === 1'bx) begin // if reached the end of file + if (errors) begin // if there were errors + $display("%s completed with %d Tests and %d errors", Tests[VectorNum], VectorNum, errors); + $stop; + end + + TestNum += 1; + // read next files + $readmemh({`PATH, Tests[TestNum]}, TestVectors); + $readmemh({`PATH, FmaRneTests[TestNum]}, FmaRneVectors); + $readmemh({`PATH, FmaRuTests[TestNum]}, FmaRuVectors); + $readmemh({`PATH, FmaRdTests[TestNum]}, FmaRdVectors); + $readmemh({`PATH, FmaRzTests[TestNum]}, FmaRzVectors); + $readmemh({`PATH, FmaRnmTests[TestNum]}, FmaRnmVectors); + FmaNum += 1; + VectorNum = 0; + if(FrmNum === 4) OpCtrlNum += 1; + if(FrmNum < 4) FrmNum += 1; + else FrmNum = 0; + // if no more Tests - finish + if(Tests[TestNum] === "" & + FmaRneTests[TestNum] === "" & + FmaRzTests[TestNum] === "" & + FmaRuTests[TestNum] === "" & + FmaRdTests[TestNum] === "" & + FmaRnmTests[TestNum] === "") begin + $display("\nAll Tests completed with %d errors\n", errors); + $stop; + end + + $display("Running %s vectors", Tests[TestNum]); + end + end +endmodule + + + + + + + + + + + + + +module readfmavectors ( + input logic clk, + input logic [2:0] Frm, + input logic [`FPSIZES/3:0] FmaModFmt, + input logic [1:0] FmaFmt, + input logic [`FLEN*4+7:0] TestVector, + input logic [31:0] VectorNum, + input logic [31:0] FmaNum, + output logic [`FLEN-1:0] Ans, + output logic ZOrigDenormE, + output logic [4:0] AnsFlags, + output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ + output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) + output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN + output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN + output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero + output logic XInfE, YInfE, ZInfE, // is XYZ infinity + output logic [`FLEN-1:0] X, Y, Z +); + + logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA + // apply test vectors on rising edge of clk + // Format of vectors Inputs(1/2/3)_AnsFlags + always @(posedge clk) begin + #1; + AnsFlags = TestVector[4:0]; + case (FmaFmt) + 2'b11: begin // quad + X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)]; + Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; + Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN]; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; + Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; + Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; + Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + end + + unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, + .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XExpMaxE, .ZOrigDenormE); +endmodule + + + + + + + + + + + + + + + + + +module readvectors ( + input logic clk, + input logic [`FLEN*4+7:0] TestVector, + input logic [`FPSIZES/3:0] ModFmt, + input logic [1:0] Fmt, + input logic [2:0] Unit, + input logic [31:0] VectorNum, + input logic [31:0] TestNum, + input logic [2:0] OpCtrl, + output logic [`FLEN-1:0] Ans, + output logic [`XLEN-1:0] SrcA, + output logic [4:0] AnsFlags, + output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ + output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) + output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN + output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN + output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero + output logic XInfE, YInfE, ZInfE, // is XYZ infinity + output logic XNormE, XExpMaxE, + output logic ZOrigDenormE, + output logic [`FLEN-1:0] X, Y, Z +); + + // apply test vectors on rising edge of clk + // Format of vectors Inputs(1/2/3)_AnsFlags + always @(posedge clk) begin + #1; + AnsFlags = TestVector[4:0]; + case (Unit) + `FMAUNIT: + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; + if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, `Q_NF'h0}; + if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; + if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, `D_NF'h0}; + if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; + else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; + if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; + else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, `S_NF'h0}; + if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; + else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; + if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, `H_NF'h0}; + if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; + else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + `DIVUNIT: + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; + Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + `CMPUNIT: + case (Fmt) + 2'b11: begin // quad + X = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; + Y = TestVector[8+(`Q_LEN)-1:9]; + Ans = TestVector[8]; + end + 2'b01: begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN)-1:9]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8]}; + end + 2'b00: begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN)-1:9]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8]}; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN)-1:9]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8]}; + end + endcase + `CVTFPUNIT: + case (Fmt) + 2'b11: begin // quad + case (OpCtrl[1:0]) + 2'b11: begin // quad + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]}; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + end + 2'b01: begin // double + case (OpCtrl[1:0]) + 2'b11: begin // quad + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`Q_LEN-1:8+(`Q_LEN)]}; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`D_LEN-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`S_LEN-1:8+(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`H_LEN-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + end + 2'b00: begin // single + case (OpCtrl[1:0]) + 2'b11: begin // quad + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`Q_LEN-1:8+(`Q_LEN)]}; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`D_LEN-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`S_LEN-1:8+(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`H_LEN-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + end + 2'b10: begin // half + case (OpCtrl[1:0]) + 2'b11: begin // quad + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`Q_LEN-1:8+(`Q_LEN)]}; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // double + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`D_LEN-1:8+(`D_LEN)]}; + Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + end + 2'b00: begin // single + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`S_LEN-1:8+(`S_LEN)]}; + Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + end + 2'b10: begin // half + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`H_LEN-1:8+(`H_LEN)]}; + Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + end + endcase + end + endcase + + `CVTINTUNIT: + case (Fmt) + 2'b11: begin // quad + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2], OpCtrl[0]}) + 2'b11: begin // long -> quad + SrcA = TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)]; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b01: begin // int -> quad + // correctly sign extend the integer depending on if it's a signed/unsigned test + SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)]}; + Ans = TestVector[8+(`Q_LEN-1):8]; + end + 2'b10: begin // quad -> long + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]}; + Ans = {TestVector[8+(`XLEN-1):8]}; + end + 2'b00: begin // double -> long + X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]}; + Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]}; + end + endcase + end + 2'b01: begin // double + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2], OpCtrl[0]}) + 2'b11: begin // long -> double + SrcA = TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)]; + Ans = TestVector[8+(`D_LEN-1):8]; + end + 2'b01: begin // int -> double + // correctly sign extend the integer depending on if it's a signed/unsigned test + SrcA = {{`XLEN-32{TestVector[8+`D_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)]}; + Ans = TestVector[8+(`D_LEN-1):8]; + end + 2'b10: begin // double -> long + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]}; + Ans = {TestVector[8+(`XLEN-1):8]}; + end + 2'b00: begin // double -> int + X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]}; + Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]}; + end + endcase + end + 2'b00: begin // single + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2], OpCtrl[0]}) + 2'b11: begin // long -> single + SrcA = TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)]; + Ans = TestVector[8+(`S_LEN-1):8]; + end + 2'b01: begin // int -> single + // correctly sign extend the integer depending on if it's a signed/unsigned test + SrcA = {{`XLEN-32{TestVector[8+`S_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)]}; + Ans = TestVector[8+(`S_LEN-1):8]; + end + 2'b10: begin // single -> long + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]}; + Ans = {TestVector[8+(`XLEN-1):8]}; + end + 2'b00: begin // single -> int + X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]}; + Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]}; + end + endcase + end + 2'b10: begin // half + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2], OpCtrl[0]}) + 2'b11: begin // long -> half + SrcA = TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)]; + Ans = TestVector[8+(`H_LEN-1):8]; + end + 2'b01: begin // int -> half + // correctly sign extend the integer depending on if it's a signed/unsigned test + SrcA = {{`XLEN-32{TestVector[8+`H_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)]}; + Ans = TestVector[8+(`H_LEN-1):8]; + end + 2'b10: begin // half -> long + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]}; + Ans = {TestVector[8+(`XLEN-1):8]}; + end + 2'b00: begin // half -> int + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]}; + Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}}, TestVector[8+(`XLEN-1):8]}; + end + endcase + end + endcase + endcase + end + + unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, + .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XExpMaxE, .ZOrigDenormE); +endmodule \ No newline at end of file diff --git a/pipelined/testbench/tests-fp.vh b/pipelined/testbench/tests-fp.vh new file mode 100644 index 000000000..d285454bb --- /dev/null +++ b/pipelined/testbench/tests-fp.vh @@ -0,0 +1,587 @@ +/////////////////////////////////////////// +// tests.vh +// +// Written: David_Harris@hmc.edu 7 October 2021 +// Modified: +// +// Purpose: List of tests to apply +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + + +`define PATH "../../tests/fp/vectors/" +`define ADD_OPCTRL 3'b110 +`define MUL_OPCTRL 3'b100 +`define SUB_OPCTRL 3'b111 +`define FADD_OPCTRL 3'b000 +`define DIV_OPCTRL 3'b000 +`define SQRT_OPCTRL 3'b001 +`define LE_OPCTRL 3'b011 +`define LT_OPCTRL 3'b001 +`define EQ_OPCTRL 3'b010 +`define TO_UI_OPCTRL 3'b011 +`define TO_I_OPCTRL 3'b001 +`define TO_UL_OPCTRL 3'b111 +`define TO_L_OPCTRL 3'b101 +`define FROM_UI_OPCTRL 3'b010 +`define FROM_I_OPCTRL 3'b000 +`define FROM_UL_OPCTRL 3'b110 +`define FROM_L_OPCTRL 3'b100 +`define RNE 3'b000 +`define RZ 3'b001 +`define RU 3'b011 +`define RD 3'b010 +`define RNM 3'b100 +`define FMAUNIT 0 +`define DIVUNIT 1 +`define CVTINTUNIT 2 +`define CVTFPUNIT 3 +`define CMPUNIT 4 + +string f16rv32cvtint[] = '{ + "f16_to_i32_rne.tv", + "f16_to_i32_rz.tv", + "f16_to_i32_ru.tv", + "f16_to_i32_rd.tv", + "f16_to_i32_rnm.tv", + "f16_to_ui32_rne.tv", + "f16_to_ui32_rz.tv", + "f16_to_ui32_ru.tv", + "f16_to_ui32_rd.tv", + "f16_to_ui32_rnm.tv", + "ui32_to_f16_rne.tv", + "ui32_to_f16_rz.tv", + "ui32_to_f16_ru.tv", + "ui32_to_f16_rd.tv", + "ui32_to_f16_rnm.tv", + "i32_to_f16_rne.tv", + "i32_to_f16_rz.tv", + "i32_to_f16_ru.tv", + "i32_to_f16_rd.tv", + "i32_to_f16_rnm.tv" +}; + +string f16rv64cvtint[] = '{ + "f16_to_ui64_rne.tv", + "f16_to_ui64_rz.tv", + "f16_to_ui64_ru.tv", + "f16_to_ui64_rd.tv", + "f16_to_ui64_rnm.tv", + "f16_to_i64_rne.tv", + "f16_to_i64_rz.tv", + "f16_to_i64_ru.tv", + "f16_to_i64_rd.tv", + "f16_to_i64_rnm.tv", + "ui64_to_f16_rne.tv", + "ui64_to_f16_rz.tv", + "ui64_to_f16_ru.tv", + "ui64_to_f16_rd.tv", + "ui64_to_f16_rnm.tv", + "i64_to_f16_rne.tv", + "i64_to_f16_rz.tv", + "i64_to_f16_ru.tv", + "i64_to_f16_rd.tv", + "i64_to_f16_rnm.tv" +}; + +string f32rv32cvtint[] = '{ + "ui32_to_f32_rne.tv", + "ui32_to_f32_rz.tv", + "ui32_to_f32_ru.tv", + "ui32_to_f32_rd.tv", + "ui32_to_f32_rnm.tv", + "i32_to_f32_rne.tv", + "i32_to_f32_rz.tv", + "i32_to_f32_ru.tv", + "i32_to_f32_rd.tv", + "i32_to_f32_rnm.tv", + "f32_to_ui32_rne.tv", + "f32_to_ui32_rz.tv", + "f32_to_ui32_ru.tv", + "f32_to_ui32_rd.tv", + "f32_to_ui32_rnm.tv", + "f32_to_i32_rne.tv", + "f32_to_i32_rz.tv", + "f32_to_i32_ru.tv", + "f32_to_i32_rd.tv", + "f32_to_i32_rnm.tv" +}; + +string f32rv64cvtint[] = '{ + "ui64_to_f32_rne.tv", + "ui64_to_f32_rz.tv", + "ui64_to_f32_ru.tv", + "ui64_to_f32_rd.tv", + "ui64_to_f32_rnm.tv", + "i64_to_f32_rne.tv", + "i64_to_f32_rz.tv", + "i64_to_f32_ru.tv", + "i64_to_f32_rd.tv", + "i64_to_f32_rnm.tv", + "f32_to_ui64_rne.tv", + "f32_to_ui64_rz.tv", + "f32_to_ui64_ru.tv", + "f32_to_ui64_rd.tv", + "f32_to_ui64_rnm.tv", + "f32_to_i64_rne.tv", + "f32_to_i64_rz.tv", + "f32_to_i64_ru.tv", + "f32_to_i64_rd.tv", + "f32_to_i64_rnm.tv" +}; + + +string f64rv32cvtint[] = '{ + "ui32_to_f64_rne.tv", + "ui32_to_f64_rz.tv", + "ui32_to_f64_ru.tv", + "ui32_to_f64_rd.tv", + "ui32_to_f64_rnm.tv", + "i32_to_f64_rne.tv", + "i32_to_f64_rz.tv", + "i32_to_f64_ru.tv", + "i32_to_f64_rd.tv", + "i32_to_f64_rnm.tv", + "f64_to_ui32_rne.tv", + "f64_to_ui32_rz.tv", + "f64_to_ui32_ru.tv", + "f64_to_ui32_rd.tv", + "f64_to_ui32_rnm.tv", + "f64_to_i32_rne.tv", + "f64_to_i32_rz.tv", + "f64_to_i32_ru.tv", + "f64_to_i32_rd.tv", + "f64_to_i32_rnm.tv" +}; + +string f64rv64cvtint[] = '{ + "ui64_to_f64_rne.tv", + "ui64_to_f64_rz.tv", + "ui64_to_f64_ru.tv", + "ui64_to_f64_rd.tv", + "ui64_to_f64_rnm.tv", + "i64_to_f64_rne.tv", + "i64_to_f64_rz.tv", + "i64_to_f64_ru.tv", + "i64_to_f64_rd.tv", + "i64_to_f64_rnm.tv", + "f64_to_ui64_rne.tv", + "f64_to_ui64_rz.tv", + "f64_to_ui64_ru.tv", + "f64_to_ui64_rd.tv", + "f64_to_ui64_rnm.tv", + "f64_to_i64_rne.tv", + "f64_to_i64_rz.tv", + "f64_to_i64_ru.tv", + "f64_to_i64_rd.tv", + "f64_to_i64_rnm.tv" +}; + +string f128rv64cvtint[] = '{ + "ui64_to_f128_rne.tv", + "ui64_to_f128_rz.tv", + "ui64_to_f128_ru.tv", + "ui64_to_f128_rd.tv", + "ui64_to_f128_rnm.tv", + "i64_to_f128_rne.tv", + "i64_to_f128_rz.tv", + "i64_to_f128_ru.tv", + "i64_to_f128_rd.tv", + "i64_to_f128_rnm.tv", + "f128_to_ui64_rne.tv", + "f128_to_ui64_rz.tv", + "f128_to_ui64_ru.tv", + "f128_to_ui64_rd.tv", + "f128_to_ui64_rnm.tv", + "f128_to_i64_rne.tv", + "f128_to_i64_rz.tv", + "f128_to_i64_ru.tv", + "f128_to_i64_rd.tv", + "f128_to_i64_rnm.tv" +}; + +string f128rv32cvtint[] = '{ + "ui32_to_f128_rne.tv", + "ui32_to_f128_rz.tv", + "ui32_to_f128_ru.tv", + "ui32_to_f128_rd.tv", + "ui32_to_f128_rnm.tv", + "i32_to_f128_rne.tv", + "i32_to_f128_rz.tv", + "i32_to_f128_ru.tv", + "i32_to_f128_rd.tv", + "i32_to_f128_rnm.tv", + "f128_to_ui32_rne.tv", + "f128_to_ui32_rz.tv", + "f128_to_ui32_ru.tv", + "f128_to_ui32_rd.tv", + "f128_to_ui32_rnm.tv", + "f128_to_i32_rne.tv", + "f128_to_i32_rz.tv", + "f128_to_i32_ru.tv", + "f128_to_i32_rd.tv", + "f128_to_i32_rnm.tv" +}; + + +string f32f16cvt[] = '{ + "f32_to_f16_rne.tv", + "f32_to_f16_rz.tv", + "f32_to_f16_ru.tv", + "f32_to_f16_rd.tv", + "f32_to_f16_rnm.tv", + "f16_to_f32_rne.tv", + "f16_to_f32_rz.tv", + "f16_to_f32_ru.tv", + "f16_to_f32_rd.tv", + "f16_to_f32_rnm.tv" +}; + +string f64f16cvt[] = '{ + "f64_to_f16_rne.tv", + "f64_to_f16_rz.tv", + "f64_to_f16_ru.tv", + "f64_to_f16_rd.tv", + "f64_to_f16_rnm.tv", + "f16_to_f64_rne.tv", + "f16_to_f64_rz.tv", + "f16_to_f64_ru.tv", + "f16_to_f64_rd.tv", + "f16_to_f64_rnm.tv" +}; + +string f128f16cvt[] = '{ + "f128_to_f16_rne.tv", + "f128_to_f16_rz.tv", + "f128_to_f16_ru.tv", + "f128_to_f16_rd.tv", + "f128_to_f16_rnm.tv", + "f16_to_f128_rne.tv", + "f16_to_f128_rz.tv", + "f16_to_f128_ru.tv", + "f16_to_f128_rd.tv", + "f16_to_f128_rnm.tv" +}; + +string f64f32cvt[] = '{ + "f64_to_f32_rne.tv", + "f64_to_f32_rz.tv", + "f64_to_f32_ru.tv", + "f64_to_f32_rd.tv", + "f64_to_f32_rnm.tv", + "f32_to_f64_rne.tv", + "f32_to_f64_rz.tv", + "f32_to_f64_ru.tv", + "f32_to_f64_rd.tv", + "f32_to_f64_rnm.tv" +}; + + +string f128f32cvt[] = '{ + "f128_to_f32_rne.tv", + "f128_to_f32_rz.tv", + "f128_to_f32_ru.tv", + "f128_to_f32_rd.tv", + "f128_to_f32_rnm.tv", + "f32_to_f128_rne.tv", + "f32_to_f128_rz.tv", + "f32_to_f128_ru.tv", + "f32_to_f128_rd.tv", + "f32_to_f128_rnm.tv" +}; + + +string f128f64cvt[] = '{ + "f64_to_f128_rne.tv", + "f64_to_f128_rz.tv", + "f64_to_f128_ru.tv", + "f64_to_f128_rd.tv", + "f64_to_f128_rnm.tv", + "f128_to_f64_rne.tv", + "f128_to_f64_rz.tv", + "f128_to_f64_ru.tv", + "f128_to_f64_rd.tv", + "f128_to_f64_rnm.tv" +}; + +string f16add[] = '{ + "f16_add_rne.tv", + "f16_add_rz.tv", + "f16_add_ru.tv", + "f16_add_rd.tv", + "f16_add_rnm.tv" +}; + +string f32add[] = '{ + "f32_add_rne.tv", + "f32_add_rz.tv", + "f32_add_ru.tv", + "f32_add_rd.tv", + "f32_add_rnm.tv" +}; + +string f64add[] = '{ + "f64_add_rne.tv", + "f64_add_rz.tv", + "f64_add_ru.tv", + "f64_add_rd.tv", + "f64_add_rnm.tv" +}; + +string f128add[] = '{ + "f128_add_rne.tv", + "f128_add_rz.tv", + "f128_add_ru.tv", + "f128_add_rd.tv", + "f128_add_rnm.tv" +}; + +string f16sub[] = '{ + "f16_sub_rne.tv", + "f16_sub_rz.tv", + "f16_sub_ru.tv", + "f16_sub_rd.tv", + "f16_sub_rnm.tv" +}; + +string f32sub[] = '{ + "f32_sub_rne.tv", + "f32_sub_rz.tv", + "f32_sub_ru.tv", + "f32_sub_rd.tv", + "f32_sub_rnm.tv" +}; + +string f64sub[] = '{ + "f64_sub_rne.tv", + "f64_sub_rz.tv", + "f64_sub_ru.tv", + "f64_sub_rd.tv", + "f64_sub_rnm.tv" +}; + +string f128sub[] = '{ + "f128_sub_rne.tv", + "f128_sub_rz.tv", + "f128_sub_ru.tv", + "f128_sub_rd.tv", + "f128_sub_rnm.tv" +}; + +string f16mul[] = '{ + "f16_mul_rne.tv", + "f16_mul_rz.tv", + "f16_mul_ru.tv", + "f16_mul_rd.tv", + "f16_mul_rnm.tv" +}; + +string f32mul[] = '{ + "f32_mul_rne.tv", + "f32_mul_rz.tv", + "f32_mul_ru.tv", + "f32_mul_rd.tv", + "f32_mul_rnm.tv" +}; + +string f64mul[] = '{ + "f64_mul_rne.tv", + "f64_mul_rz.tv", + "f64_mul_ru.tv", + "f64_mul_rd.tv", + "f64_mul_rnm.tv" +}; + +string f128mul[] = '{ + "f128_mul_rne.tv", + "f128_mul_rz.tv", + "f128_mul_ru.tv", + "f128_mul_rd.tv", + "f128_mul_rnm.tv" +}; + +string f16div[] = '{ + "f16_div_rne.tv", + "f16_div_rz.tv", + "f16_div_ru.tv", + "f16_div_rd.tv", + "f16_div_rnm.tv" +}; + +string f32div[] = '{ + "f32_div_rne.tv", + "f32_div_rz.tv", + "f32_div_ru.tv", + "f32_div_rd.tv", + "f32_div_rnm.tv" +}; + +string f64div[] = '{ + "f64_div_rne.tv", + "f64_div_rz.tv", + "f64_div_ru.tv", + "f64_div_rd.tv", + "f64_div_rnm.tv" +}; + +string f128div[] = '{ + "f128_div_rne.tv", + "f128_div_rz.tv", + "f128_div_ru.tv", + "f128_div_rd.tv", + "f128_div_rnm.tv" +}; + +string f16sqrt[] = '{ + "f16_sqrt_rne.tv", + "f16_sqrt_rz.tv", + "f16_sqrt_ru.tv", + "f16_sqrt_rd.tv", + "f16_sqrt_rnm.tv" +}; + +string f32sqrt[] = '{ + "f32_sqrt_rne.tv", + "f32_sqrt_rz.tv", + "f32_sqrt_ru.tv", + "f32_sqrt_rd.tv", + "f32_sqrt_rnm.tv" +}; + +string f64sqrt[] = '{ + "f64_sqrt_rne.tv", + "f64_sqrt_rz.tv", + "f64_sqrt_ru.tv", + "f64_sqrt_rd.tv", + "f64_sqrt_rnm.tv" +}; + +string f128sqrt[] = '{ + "f128_sqrt_rne.tv", + "f128_sqrt_rz.tv", + "f128_sqrt_ru.tv", + "f128_sqrt_rd.tv", + "f128_sqrt_rnm.tv" +}; + +string f16cmp[] = '{ + "f16_eq_rne.tv", + "f16_eq_rz.tv", + "f16_eq_ru.tv", + "f16_eq_rd.tv", + "f16_eq_rnm.tv", + "f16_le_rne.tv", + "f16_le_rz.tv", + "f16_le_ru.tv", + "f16_le_rd.tv", + "f16_le_rnm.tv", + "f16_lt_rne.tv", + "f16_lt_rz.tv", + "f16_lt_ru.tv", + "f16_lt_rd.tv", + "f16_lt_rnm.tv" +}; + +string f32cmp[] = '{ + "f32_eq_rne.tv", + "f32_eq_rz.tv", + "f32_eq_ru.tv", + "f32_eq_rd.tv", + "f32_eq_rnm.tv", + "f32_le_rne.tv", + "f32_le_rz.tv", + "f32_le_ru.tv", + "f32_le_rd.tv", + "f32_le_rnm.tv", + "f32_lt_rne.tv", + "f32_lt_rz.tv", + "f32_lt_ru.tv", + "f32_lt_rd.tv", + "f32_lt_rnm.tv" +}; + +string f64cmp[] = '{ + "f64_eq_rne.tv", + "f64_eq_rz.tv", + "f64_eq_ru.tv", + "f64_eq_rd.tv", + "f64_eq_rnm.tv", + "f64_le_rne.tv", + "f64_le_rz.tv", + "f64_le_ru.tv", + "f64_le_rd.tv", + "f64_le_rnm.tv", + "f64_lt_rne.tv", + "f64_lt_rz.tv", + "f64_lt_ru.tv", + "f64_lt_rd.tv", + "f64_lt_rnm.tv" +}; + +string f128cmp[] = '{ + "f128_eq_rne.tv", + "f128_eq_rz.tv", + "f128_eq_ru.tv", + "f128_eq_rd.tv", + "f128_eq_rnm.tv", + "f128_le_rne.tv", + "f128_le_rz.tv", + "f128_le_ru.tv", + "f128_le_rd.tv", + "f128_le_rnm.tv", + "f128_lt_rne.tv", + "f128_lt_rz.tv", + "f128_lt_ru.tv", + "f128_lt_rd.tv", + "f128_lt_rnm.tv" +}; + +string f16fma[] = '{ + "f16_mulAdd_rne.tv", + "f16_mulAdd_rz.tv", + "f16_mulAdd_ru.tv", + "f16_mulAdd_rd.tv", + "f16_mulAdd_rnm.tv" +}; + +string f32fma[] = '{ + "f32_mulAdd_rne.tv", + "f32_mulAdd_rz.tv", + "f32_mulAdd_ru.tv", + "f32_mulAdd_rd.tv", + "f32_mulAdd_rnm.tv" +}; + +string f64fma[] = '{ + "f64_mulAdd_rne.tv", + "f64_mulAdd_rz.tv", + "f64_mulAdd_ru.tv", + "f64_mulAdd_rd.tv", + "f64_mulAdd_rnm.tv" +}; + +string f128fma[] = '{ + "f128_mulAdd_rne.tv", + "f128_mulAdd_rz.tv", + "f128_mulAdd_ru.tv", + "f128_mulAdd_rd.tv", + "f128_mulAdd_rnm.tv" +}; + + + diff --git a/synthDC/ppaAnalyze.py b/synthDC/ppaAnalyze.py index edad94dd4..56e8b2f83 100755 --- a/synthDC/ppaAnalyze.py +++ b/synthDC/ppaAnalyze.py @@ -1,6 +1,8 @@ #!/usr/bin/python3 from distutils.log import error +from statistics import median import subprocess +import statistics import csv import re import matplotlib.pyplot as plt @@ -32,13 +34,17 @@ def getData(): for i in range(len(linesCPL)): line = linesCPL[i] mwm = wm.findall(line)[0][4:-4].split('_') + freq = int(f.findall(line)[0][1:-4]) + delay = float(cpl.findall(line)[0]) + area = float(da.findall(linesDA[i])[0]) + mod = mwm[0] + width = int(mwm[1]) + power = p.findall(linesP[i]) - oneSynth = [mwm[0], int(mwm[1])] - oneSynth += [int(f.findall(line)[0][1:-4])] - oneSynth += [float(cpl.findall(line)[0])] - oneSynth += [float(da.findall(linesDA[i])[0])] - oneSynth += [float(power[1])] - oneSynth += [float(power[2])] + lpower = float(power[2]) + denergy = float(power[1])/freq + + oneSynth = [mod, width, freq, delay, area, lpower, denergy] allSynths += [oneSynth] return allSynths @@ -47,133 +53,209 @@ def getVals(module, freq, var): global allSynths if (var == 'delay'): ind = 3 - units = " (ps)" + units = " (ns)" elif (var == 'area'): ind = 4 units = " (sq microns)" - elif (var == 'dpower'): - ind = 5 - units = " (mW)" elif (var == 'lpower'): - ind = 6 + ind = 5 units = " (nW)" + elif (var == 'denergy'): + ind = 6 + units = " (uJ)" #fix check math else: error widths = [] - ivar = [] + metric = [] for oneSynth in allSynths: if (oneSynth[0] == module) & (oneSynth[2] == freq): widths += [oneSynth[1]] - ivar += [oneSynth[ind]] - return widths, ivar, units + m = oneSynth[ind] + if (ind==6): m*=1000 + metric += [m] + return widths, metric, units def writeCSV(allSynths): file = open("ppaData.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area', 'D Power (mW)', 'L Power (nW)']) + writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)']) for one in allSynths: writer.writerow(one) file.close() -def polyfitR2(x, y, deg): - ''' from internet, check math''' - z = np.polyfit(x, y, deg) - p = np.poly1d(z) - yhat = p(x) # or [p(z) for z in x] - ybar = np.sum(y)/len(y) # or sum(y)/len(y) - ssreg = np.sum((yhat-ybar)**2) # or sum([ (yihat - ybar)**2 for yihat in yhat]) - sstot = np.sum((y - ybar)**2) # or sum([ (yi - ybar)**2 for yi in y]) - r2 = ssreg / sstot - return p, r2 +def genLegend(fits, coefs, module, r2): -def plotPPA(module, freq, var): + coefsr = [str(round(c, 3)) for c in coefs] + + eq = '' + ind = 0 + if 'c' in fits: + eq += coefsr[ind] + ind += 1 + if 'l' in fits: + eq += " + " + coefsr[ind] + "*N" + ind += 1 + if 's' in fits: + eq += " + " + coefsr[ind] + "*N^2" + ind += 1 + if 'g' in fits: + eq += " + " + coefsr[ind] + "*log2(N)" + ind += 1 + if 'n' in fits: + eq += " + " + coefsr[ind] + "*Nlog2(N)" + ind += 1 + + legend_elements = [lines.Line2D([0], [0], color='orange', label=eq), + lines.Line2D([0], [0], color='steelblue', ls='', marker='o', label=' R^2='+ str(round(r2, 4)))] + return legend_elements + +def plotPPA(module, freq, var, ax=None, fits='clsgn'): ''' module: string module name - freq: int freq (GHz) - var: string 'delay' or 'area' + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 plots chosen variable vs width for all matching syntheses with regression ''' - - # A = np.vstack([x, np.ones(len(x))]).T - # mcresid = np.linalg.lstsq(A, y, rcond=None) - # m, c = mcresid[0] - # resid = mcresid[1] - # r2 = 1 - resid / (y.size * y.var()) - # p, r2p = polyfitR2(x, y, 2) - # zlog = np.polyfit(np.log(x), y, 1) - # plog = np.poly1d(zlog) - # xplog = np.log(xp) - # _ = plt.plot(x, m*x + c, 'r', label='Linear fit R^2='+ str(r2)[1:7]) - # _ = plt.plot(xp, p(xp), label='Quadratic fit R^2='+ str(r2p)[:6]) - # _ = plt.plot(xp, plog(xplog), label = 'Log fit') - - widths, ivar, units = getVals(module, freq, var) - coefs, r2 = regress(widths, ivar) + widths, metric, units = getVals(module, freq, var) + coefs, r2, funcArr = regress(widths, metric, fits) xp = np.linspace(8, 140, 200) - pred = [coefs[0] + x*coefs[1] + np.log(x)*coefs[2] + x*np.log(x)*coefs[3] for x in xp] + pred = [] + for x in xp: + y = [func(x) for func in funcArr] + pred += [sum(np.multiply(coefs, y))] - r2p = round(r2[0], 4) - rcoefs = [round(c, 3) for c in coefs] + if ax is None: + singlePlot = True + ax = plt.gca() + else: + singlePlot = False - l = "{} + {}*N + {}*log(N) + {}*Nlog(N)".format(*rcoefs) - legend_elements = [lines.Line2D([0], [0], color='steelblue', label=module), - lines.Line2D([0], [0], color='orange', label=l), - lines.Line2D([0], [0], ls='', label=' R^2='+ str(r2p))] + ax.scatter(widths, metric) + ax.plot(xp, pred, color='orange') - _ = plt.plot(widths, ivar, 'o', label=module, markersize=10) - _ = plt.plot(xp, pred) - _ = plt.legend(handles=legend_elements) - _ = plt.xlabel("Width (bits)") - _ = plt.ylabel(str.title(var) + units) - _ = plt.title("Target frequency " + str(freq) + "MHz") + legend_elements = genLegend(fits, coefs, module, r2) + ax.legend(handles=legend_elements) + + ax.set_xticks(widths) + ax.set_xlabel("Width (bits)") + ax.set_ylabel(str.title(var) + units) + + if singlePlot: + ax.set_title(module + " (target " + str(freq) + "MHz)") + plt.show() + +def makePlots(mod, freq): + fig, axs = plt.subplots(2, 2) + plotPPA(mod, freq, 'delay', ax=axs[0,0], fits='cgl') + plotPPA(mod, freq, 'area', ax=axs[0,1], fits='clg') + plotPPA(mod, freq, 'lpower', ax=axs[1,0], fits='c') + plotPPA(mod, freq, 'denergy', ax=axs[1,1], fits='glc') + plt.suptitle(mod + " (target " + str(freq) + "MHz)") plt.show() -def makePlots(mod): - plotPPA(mod, 5000, 'delay') - plotPPA(mod, 5000, 'area') - plotPPA(mod, 10, 'area') - plotPPA(mod, 5000, 'lpower') - plotPPA(mod, 5000, 'dpower') +def regress(widths, var, fits='clsgn'): -def regress(widths, var): + funcArr = genFuncs(fits) mat = [] for w in widths: - row = [1, w, np.log(w), w*np.log(w)] + row = [] + for func in funcArr: + row += [func(w)] mat += [row] y = np.array(var, dtype=np.float) coefsResid = np.linalg.lstsq(mat, y, rcond=None) coefs = coefsResid[0] - resid = coefsResid[1] + try: + resid = coefsResid[1][0] + except: + resid = 0 r2 = 1 - resid / (y.size * y.var()) - return coefs, r2 + return coefs, r2, funcArr def makeCoefTable(): file = open("ppaFitting.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'log(N)', 'Nlog(N)', 'R^2']) + writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) for mod in ['add', 'mult', 'comparator', 'shifter']: for comb in [['delay', 5000], ['area', 5000], ['area', 10]]: var = comb[0] freq = comb[1] - widths, ivar, units = getVals(mod, freq, var) - coefs, r2 = regress(widths, ivar) - row = [mod] + comb + np.ndarray.tolist(coefs) + [r2[0]] + widths, metric, units = getVals(mod, freq, var) + coefs, r2, funcArr = regress(widths, metric) + row = [mod] + comb + np.ndarray.tolist(coefs) + [r2] writer.writerow(row) file.close() +def genFuncs(fits='clsgn'): + funcArr = [] + if 'c' in fits: + funcArr += [lambda x: 1] + if 'l' in fits: + funcArr += [lambda x: x] + if 's' in fits: + funcArr += [lambda x: x**2] + if 'g' in fits: + funcArr += [lambda x: np.log2(x)] + if 'n' in fits: + funcArr += [lambda x: x*np.log2(x)] + return funcArr + +def noOutliers(freqs, delays, areas): + med = statistics.median(freqs) + f=[] + d=[] + a=[] + for i in range(len(freqs)): + norm = freqs[i]/med + if (norm > 0.25) & (norm<1.75): + f += [freqs[i]] + d += [delays[i]] + a += [areas[i]] + return f, d, a + +def freqPlot(mod, width): + freqs = [] + delays = [] + areas = [] + for oneSynth in allSynths: + if (mod == oneSynth[0]) & (width == oneSynth[1]): + freqs += [oneSynth[2]] + delays += [oneSynth[3]] + areas += [oneSynth[4]] + + freqs, delays, areas = noOutliers(freqs, delays, areas) + + adprod = np.multiply(areas, delays) + adsq = np.multiply(adprod, delays) + + f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) + ax1.scatter(freqs, delays) + ax2.scatter(freqs, areas) + ax3.scatter(freqs, adprod) + ax4.scatter(freqs, adsq) + ax4.set_xlabel("Freq (MHz)") + ax1.set_ylabel('Delay (ns)') + ax2.set_ylabel('Area (sq microns)') + ax3.set_ylabel('Area * Delay') + ax4.set_ylabel('Area * Delay^2') + ax1.set_title(mod + '_' + str(width)) + plt.show() + allSynths = getData() - writeCSV(allSynths) +# makeCoefTable() -makePlots('shifter') +freqPlot('comparator', 8) -makeCoefTable() +# makePlots('shifter', 5000) +# plotPPA('comparator', 5000, 'delay', fits='cls') \ No newline at end of file diff --git a/synthDC/ppaData.csv b/synthDC/ppaData.csv index 3ea5648b6..9e09c3403 100644 --- a/synthDC/ppaData.csv +++ b/synthDC/ppaData.csv @@ -1,67 +1,197 @@ -Module,Width,Target Freq,Delay,Area,D Power (mW),L Power (nW) -add,128,10,7.100851,1867.879976,0.00501,465.925 -add,128,5000,0.389771,7007.980119,3.309,2.77 -add,16,10,2.032906,221.479998,0.000575,55.29 -add,16,4000,0.249839,551.74001,0.239,302.479 -add,16,5000,0.228259,924.140017,0.519,641.631 -add,16,6000,0.225754,1120.140018,0.739,1.01 -add,32,10,4.160501,456.679995,0.00118,112.161 -add,32,4000,0.280842,1730.680031,0.735,849.828 -add,32,5000,0.2505,1933.540033,1.049,1.03 -add,32,6000,0.271774,1746.36003,1.138,955.901 -add,64,10,8.474034,927.079988,0.00246,230.083 -add,64,4000,0.323267,3758.300065,1.523,1.75 -add,64,5000,0.334061,3798.480071,1.917,2.18 -add,64,6000,0.328457,3749.480066,2.346,1.77 -add,8,10,0.940062,103.879999,0.000241,24.765 -add,8,5000,0.199689,197.960003,0.113,83.576 -comparator,128,10,0.842074,1997.240039,0.00087,243.506 -comparator,128,5000,0.260142,5215.56005,3.708,6.0 -comparator,16,10,0.576329,252.840005,0.000144,31.402 -comparator,16,4000,0.249312,280.280005,0.0581,55.248 -comparator,16,5000,0.199026,313.600006,0.0859,78.893 -comparator,16,6000,0.166568,422.380007,0.255,301.506 -comparator,32,10,0.765874,495.88001,0.000226,66.41 -comparator,32,4000,0.24995,608.580012,0.168,130.613 -comparator,32,5000,0.205372,919.240014,0.43,840.47 -comparator,32,6000,0.2012,1248.520016,0.928,1.48 -comparator,64,10,0.561562,1008.42002,0.000449,127.626 -comparator,64,4000,0.249905,1437.660027,0.462,558.66 -comparator,64,5000,0.219296,2738.120023,1.989,2.95 -comparator,64,6000,0.221138,2341.220025,1.343,2.59 -comparator,8,10,0.29577,118.580002,6.83e-05,16.053 -comparator,8,5000,0.195502,129.360003,0.0358,21.443 -mult,128,10,9.334627,180734.540854,0.428,1.8 -mult,128,5000,1.78322,314617.244472,997.34,1.63 -mult,16,10,4.730546,3869.040009,0.0107,641.517 -mult,16,4000,0.821111,9132.620147,14.407,8.03 -mult,16,5000,0.820059,9583.420143,20.175,8.5 -mult,16,6000,0.831308,8594.600132,21.106,7.15 -mult,32,10,7.575772,12412.680067,0.0229,1.18 -mult,32,4000,1.091389,31262.980534,65.471,2.49 -mult,32,5000,1.092153,31497.200524,79.554,2.58 -mult,32,6000,1.084816,33519.920555,103.798,2.91 -mult,64,10,4.7933,46798.920227,0.103,5.46 -mult,64,4000,1.411752,93087.261425,227.876,6.05 -mult,64,5000,1.404875,94040.801492,298.667,6.16 -mult,64,6000,1.415466,89931.661403,337.302,5.63 -mult,8,10,2.076433,1009.399998,0.00206,211.637 -mult,8,5000,0.552339,4261.040075,5.543,5.05 -mux2,1,10,0.060639,6.86,5.15e-06,1.19 -mux2,1,10,0.060639,6.86,5.15e-06,1.19 -shifter,128,10,2.758726,9722.580189,0.00789,720.698 -shifter,128,5000,0.401118,19106.080347,6.94,1.23 -shifter,16,10,1.237745,681.100013,0.000441,52.029 -shifter,16,5000,0.209586,2120.720031,1.025,2.15 -shifter,32,10,1.906335,1656.200032,0.00115,118.773 -shifter,32,4000,0.260606,3490.760054,1.282,2.57 -shifter,32,4000,0.260606,3490.760054,1.282,2.57 -shifter,32,4000,0.260606,3490.760054,1.282,2.57 -shifter,32,5000,0.238962,4985.260077,2.489,4.9 -shifter,32,6000,0.241742,4312.000069,2.411,3.71 -shifter,32,6000,0.241742,4312.000069,2.411,3.71 -shifter,32,6000,0.241742,4312.000069,2.411,3.71 -shifter,64,10,2.919486,4346.300085,0.00297,210.734 -shifter,64,5000,0.358993,9471.700156,4.518,6.94 -shifter,8,10,0.622998,244.020005,0.00019,26.943 -shifter,8,5000,0.198885,495.88001,0.285,300.128 +Module,Width,Target Freq,Delay,Area,L Power (nW),D energy (mJ) +add,128,10,7.100851,1867.879976,465.925,0.0005009999999999999 +add,128,1538,0.633294,4623.64009,632.254,0.00027958387516254874 +add,128,2051,0.486762,4951.940095,885.884,0.0003568990736226231 +add,128,2359,0.423881,5520.340104,1.49,0.00045146248410343363 +add,128,2410,0.414767,5600.700103,1.57,0.00045684647302904563 +add,128,2462,0.406101,5721.240105,1.77,0.0004780666125101544 +add,128,2513,0.397913,6085.800112,2.14,0.0005161161957819339 +add,128,2564,0.436395,6456.240111,2.27,0.0005503120124804992 +add,128,2615,0.390136,6662.040117,2.45,0.0006137667304015296 +add,128,2667,0.394304,7494.060127,3.58,0.00072928383952006 +add,128,2718,0.407908,7287.280117,3.35,0.0006938925680647534 +add,128,2769,0.431383,6941.340124,2.86,0.0006218851570964247 +add,128,3077,0.387515,7712.60013,2.93,0.0007572310692232694 +add,128,3590,0.386891,6860.000114,2.62,0.0006579387186629527 +add,128,5000,0.389771,7007.980119,2.77,0.0006618 +add,16,10,2.032906,221.479998,55.29,5.75e-05 +add,16,2609,0.375085,405.720008,52.28,2.9359908010732082e-05 +add,16,3478,0.287131,443.940009,126.253,4.1978148361127085e-05 +add,16,4000,0.249839,551.74001,302.479,5.9749999999999995e-05 +add,16,4087,0.243761,503.720009,183.936,5.113775385368241e-05 +add,16,4174,0.239287,549.780011,304.811,6.013416387158601e-05 +add,16,4261,0.234402,607.60001,368.742,6.688570758038019e-05 +add,16,4348,0.22992,610.540011,364.173,6.577736890524379e-05 +add,16,4435,0.22545,666.400011,419.709,7.891770011273957e-05 +add,16,4522,0.222724,820.260016,626.379,9.022556390977442e-05 +add,16,4609,0.221986,815.360013,735.998,8.960729008461705e-05 +add,16,4696,0.227412,866.320016,645.684,9.731686541737649e-05 +add,16,5000,0.228259,924.140017,641.631,0.0001038 +add,16,5217,0.22222,824.180016,601.276,8.778991757715163e-05 +add,16,6000,0.225754,1120.140018,1.01,0.00012316666666666666 +add,16,6087,0.226225,857.500013,678.287,0.00010284212255626745 +add,32,10,4.160501,456.679995,112.161,0.00011800000000000001 +add,32,2400,0.41509,958.440019,151.083,6.875e-05 +add,32,3200,0.312424,1121.120021,296.836,0.000105625 +add,32,3680,0.271527,1465.100024,591.825,0.00015000000000000001 +add,32,3760,0.278449,1689.520028,834.387,0.00017898936170212767 +add,32,3840,0.291206,1547.420027,784.112,0.00015859375 +add,32,3920,0.273454,2044.280039,1.33,0.00022066326530612246 +add,32,4000,0.280842,1730.680031,849.828,0.00018375 +add,32,4080,0.256294,1991.360031,1.24,0.00021397058823529412 +add,32,4160,0.253175,2031.540036,1.24,0.00021995192307692308 +add,32,4240,0.268332,1829.660028,1.09,0.00019245283018867924 +add,32,4320,0.254861,1716.960028,866.723,0.0001814814814814815 +add,32,4800,0.258491,1955.100033,1.07,0.00022458333333333334 +add,32,5000,0.2505,1933.540033,1.03,0.00020979999999999998 +add,32,5600,0.254525,1871.800028,877.446,0.0001967857142857143 +add,32,6000,0.271774,1746.36003,955.901,0.00018966666666666665 +add,64,10,8.474034,927.079988,230.083,0.000246 +add,64,1818,0.538894,2114.840041,250.049,0.0001375137513751375 +add,64,2424,0.412474,2298.100044,453.413,0.00017574257425742574 +add,64,2788,0.358537,2637.180048,758.693,0.00023565279770444765 +add,64,2848,0.351091,2625.420049,698.362,0.00023525280898876406 +add,64,2909,0.343753,2800.840049,852.781,0.0002536954279821244 +add,64,2970,0.337807,3412.360059,1.37,0.00032895622895622896 +add,64,3030,0.331556,3202.640054,1.28,0.0003099009900990099 +add,64,3091,0.349251,3284.960053,1.35,0.00031802005823358134 +add,64,3152,0.328164,3804.360061,1.89,0.00038229695431472085 +add,64,3212,0.336436,3593.660062,1.72,0.00035523038605230384 +add,64,3273,0.311119,3816.120062,1.96,0.0003923006416131989 +add,64,3636,0.330032,3266.340054,1.22,0.00033938393839383937 +add,64,4000,0.323267,3758.300065,1.75,0.00038074999999999996 +add,64,4242,0.328234,3507.420063,1.57,0.00033757661480433756 +add,64,5000,0.334061,3798.480071,2.18,0.0003834 +add,64,6000,0.328457,3749.480066,1.77,0.000391 +add,8,10,0.940062,103.879999,24.765,2.41e-05 +add,8,5000,0.199689,197.960003,83.576,2.26e-05 +comparator,128,10,0.842074,1997.240039,243.506,8.7e-05 +comparator,128,5000,0.260142,5215.56005,6.0,0.0007416 +comparator,16,10000,0.146177,1065.260009,1.61,0.00012470000000000002 +comparator,16,10,0.576329,252.840005,31.402,1.4400000000000001e-05 +comparator,16,5000,0.199026,313.600006,78.893,1.718e-05 +comparator,16,6000,0.166568,422.380007,301.506,4.25e-05 +comparator,32,10000,0.194087,1451.380013,1.85,0.00024430000000000003 +comparator,32,10,0.765874,495.88001,66.41,2.26e-05 +comparator,32,4000,0.24995,608.580012,130.613,4.2000000000000004e-05 +comparator,32,5000,0.205372,919.240014,840.47,8.6e-05 +comparator,32,6000,0.2012,1248.520016,1.48,0.00015466666666666667 +comparator,64,10,0.561562,1008.42002,127.626,4.49e-05 +comparator,64,4000,0.249905,1437.660027,558.66,0.0001155 +comparator,64,5000,0.219296,2738.120023,2.95,0.0003978 +comparator,64,6000,0.221138,2341.220025,2.59,0.00022383333333333332 +comparator,8,10000,0.1136,496.86,810.074,6.46e-05 +comparator,8,10909,0.11361,387.1,565.114,5.885049042075351e-05 +comparator,8,10,0.29577,118.580002,16.053,6.830000000000001e-06 +comparator,8,12727,0.113615,488.039998,768.445,6.364422094759174e-05 +comparator,8,5000,0.195502,129.360003,21.443,7.16e-06 +comparator,8,5455,0.182936,130.340003,22.567,7.259395050412466e-06 +comparator,8,7273,0.13643,147.980003,61.898,1.4711948301938677e-05 +comparator,8,8364,0.119528,210.700003,172.337,2.654232424677188e-05 +comparator,8,8545,0.116724,205.800003,165.947,2.7969572849619658e-05 +comparator,8,8727,0.124671,264.600002,278.768,3.55219433940644e-05 +comparator,8,8909,0.11208,261.660004,251.629,3.5694241777977326e-05 +comparator,8,9091,0.10991,297.920001,343.785,3.882961170388296e-05 +comparator,8,9273,0.107742,309.680003,356.05,4.162622667960746e-05 +comparator,8,9455,0.106411,345.94,438.668,4.569011105235325e-05 +comparator,8,9636,0.111488,397.88,589.556,5.645496056454961e-05 +comparator,8,9818,0.11361,381.219999,573.131,5.265838256264005e-05 +mult,128,10,9.334627,180734.540854,1.8,0.0428 +mult,128,337,2.963253,201889.800086,2.67,0.045112759643916915 +mult,128,449,2.227145,212055.340673,3.27,0.04989086859688196 +mult,128,5000,1.78322,314617.244472,1.63,0.199468 +mult,128,517,1.934229,243417.302347,5.67,0.08774468085106382 +mult,128,528,1.893939,255011.682875,6.65,0.10337878787878789 +mult,128,539,1.855281,259737.242949,7.18,0.10912615955473098 +mult,128,551,1.814879,274624.423573,8.73,0.12750816696914702 +mult,128,562,1.779353,284850.723775,1.03,0.1501779359430605 +mult,128,573,1.745187,296812.604204,1.08,0.14241186736474695 +mult,128,584,1.712328,298800.044147,1.15,0.14923630136986302 +mult,128,596,1.71139,312992.404301,1.44,0.16681040268456376 +mult,128,607,1.707473,305974.624156,1.38,0.1625996705107084 +mult,128,674,1.727276,311582.184447,1.52,0.18965133531157272 +mult,128,787,1.735561,317542.544465,1.66,0.19689453621346886 +mult,16,10,4.730546,3869.040009,641.517,0.00107 +mult,16,1122,0.891172,6478.780105,3.54,0.002767379679144385 +mult,16,1146,0.87258,7193.200125,4.57,0.003224258289703316 +mult,16,1171,0.853963,7258.860127,4.57,0.0031195559350982068 +mult,16,1195,0.836814,7685.16012,5.33,0.0032225941422594144 +mult,16,1220,0.81966,8829.800131,6.95,0.0035008196721311477 +mult,16,1244,0.822616,8780.800145,7.15,0.0033842443729903537 +mult,16,1268,0.802449,9789.220166,8.8,0.0038998422712933755 +mult,16,1293,0.813903,9702.000166,8.74,0.0036071152358855374 +mult,16,1317,0.805748,10366.440177,1.01,0.003979498861047836 +mult,16,1463,0.83466,8521.100128,6.71,0.0035974025974025974 +mult,16,1707,0.829615,8563.24013,6.78,0.003674282366725249 +mult,16,4000,0.821111,9132.620147,8.03,0.00360175 +mult,16,5000,0.820059,9583.420143,8.5,0.004035 +mult,16,6000,0.831308,8594.600132,7.15,0.0035176666666666668 +mult,16,732,1.36399,4043.480026,624.48,0.0006612021857923497 +mult,16,976,1.024406,4960.760064,1.32,0.0011854508196721312 +mult,32,1000,1.099618,29507.800463,2.24,0.015257 +mult,32,10,7.575772,12412.680067,1.18,0.00229 +mult,32,1111,1.092041,31649.100517,2.53,0.01587128712871287 +mult,32,1296,1.097292,30544.640517,2.37,0.015766203703703702 +mult,32,4000,1.091389,31262.980534,2.49,0.01636775 +mult,32,5000,1.092153,31497.200524,2.58,0.0159108 +mult,32,556,1.796075,14371.700056,2.21,0.002714028776978417 +mult,32,6000,1.084816,33519.920555,2.91,0.017299666666666668 +mult,32,741,1.349466,17389.120212,4.65,0.005995951417004048 +mult,32,852,1.173643,23514.120391,1.27,0.012269953051643193 +mult,32,870,1.149401,25198.740416,1.5,0.013455172413793104 +mult,32,889,1.124838,26822.600434,1.8,0.01463217097862767 +mult,32,907,1.102529,29124.620481,2.08,0.014771775082690187 +mult,32,926,1.101021,31000.340484,2.46,0.014745140388768898 +mult,32,944,1.085045,32407.620517,2.68,0.01608262711864407 +mult,32,963,1.089271,32490.92054,2.7,0.016202492211838004 +mult,32,981,1.091413,33127.920535,2.84,0.017559633027522933 +mult,64,1000,1.350119,103523.281624,7.3,0.05962 +mult,64,10,4.7933,46798.920227,5.46,0.0103 +mult,64,4000,1.411752,93087.261425,6.05,0.056969 +mult,64,429,2.326205,53642.260108,7.4,0.011111888111888112 +mult,64,5000,1.404875,94040.801492,6.16,0.05973339999999999 +mult,64,571,1.751186,58587.340388,1.1,0.01569352014010508 +mult,64,6000,1.415466,89931.661403,5.63,0.056217 +mult,64,657,1.52205,69763.260863,2.39,0.03356773211567732 +mult,64,671,1.490298,74604.461058,2.89,0.039280178837555885 +mult,64,686,1.457722,78293.181181,3.18,0.04122594752186589 +mult,64,700,1.428547,82949.161302,3.92,0.04733428571428572 +mult,64,714,1.400528,87215.101373,4.39,0.04964425770308123 +mult,64,729,1.371734,93726.221523,5.35,0.05337037037037037 +mult,64,743,1.345895,95943.961579,5.62,0.05491924629878869 +mult,64,757,1.341232,106627.921626,7.73,0.058137384412153235 +mult,64,771,1.341474,98844.761554,6.33,0.05606225680933852 +mult,64,857,1.336163,107976.401664,7.95,0.059478413068844806 +mult,8,1091,0.915221,1167.180013,211.892,0.00017048579285059578 +mult,8,10,2.076433,1009.399998,211.637,0.00020600000000000002 +mult,8,1455,0.687251,1615.04003,680.207,0.0004233676975945017 +mult,8,1673,0.611485,2094.260033,1.39,0.000639569635385535 +mult,8,1709,0.599356,2453.920037,2.01,0.0008338209479227619 +mult,8,1745,0.589521,2771.440043,2.58,0.0008406876790830946 +mult,8,1782,0.582418,2549.960043,2.14,0.0008759820426487093 +mult,8,1818,0.581954,2672.460046,2.2,0.0008663366336633663 +mult,8,1855,0.605444,2332.40004,1.74,0.0007547169811320754 +mult,8,1891,0.605341,2405.90004,1.93,0.0007599153886832364 +mult,8,1927,0.574177,3273.200051,3.43,0.0009600415153087702 +mult,8,1964,0.585681,2746.940044,2.48,0.0008778004073319755 +mult,8,2182,0.550085,4360.02008,5.2,0.0011608615948670944 +mult,8,2545,0.564127,4034.66007,4.58,0.0011772102161100196 +mult,8,5000,0.552339,4261.040075,5.05,0.0011086 +mux2,1,10,0.060639,6.86,1.19,5.149999999999999e-07 +mux2,1,10,0.060639,6.86,1.19,5.149999999999999e-07 +shifter,128,10,2.758726,9722.580189,720.698,0.000789 +shifter,128,5000,0.401118,19106.080347,1.23,0.0013880000000000001 +shifter,16,10,1.237745,681.100013,52.029,4.41e-05 +shifter,16,5000,0.209586,2120.720031,2.15,0.000205 +shifter,32,10,1.906335,1656.200032,118.773,0.000115 +shifter,32,4000,0.260606,3490.760054,2.57,0.0003205 +shifter,32,4000,0.260606,3490.760054,2.57,0.0003205 +shifter,32,4000,0.260606,3490.760054,2.57,0.0003205 +shifter,32,5000,0.238962,4985.260077,4.9,0.0004978 +shifter,32,6000,0.241742,4312.000069,3.71,0.00040183333333333336 +shifter,32,6000,0.241742,4312.000069,3.71,0.00040183333333333336 +shifter,32,6000,0.241742,4312.000069,3.71,0.00040183333333333336 +shifter,64,10,2.919486,4346.300085,210.734,0.000297 +shifter,64,5000,0.358993,9471.700156,6.94,0.0009036 +shifter,8,10,0.622998,244.020005,26.943,1.9e-05 +shifter,8,5000,0.198885,495.88001,300.128,5.6999999999999996e-05 diff --git a/synthDC/ppaFitting.csv b/synthDC/ppaFitting.csv index 882977245..6b88ead61 100644 --- a/synthDC/ppaFitting.csv +++ b/synthDC/ppaFitting.csv @@ -1,13 +1,13 @@ -Module,Metric,Freq,1,N,log(N),Nlog(N),R^2 -add,delay,5000,0.23935453005464438,0.015973094945355207,-0.058207695467226296,-0.002593789781151714,0.9902532112478974 -add,area,5000,-1032.1274349672115,64.4386855922132,374.6678949053879,-3.2579193244904823,0.9999180068922152 -add,area,10,-13.720004131149423,14.699999256147343,3.6067390521177815e-06,9.312480709428003e-08,1.0 -mult,delay,5000,-0.21755360109289562,-0.00033127390710363004,0.36865114245083547,0.0004100845872014472,0.9999815499619515 -mult,area,5000,-29928.193338752997,-11370.538120558254,39122.3984379376,2592.313970431163,0.9998454828501703 -mult,area,10,-24112.991162714883,-8735.874000034026,30452.017533199683,1892.3032427172166,0.9999575675635335 -comparator,delay,5000,0.18302939890710385,-0.001793523907103751,0.00950014684425352,0.0004195522734073458,0.9999387049502957 -comparator,area,5000,1831.2076391201958,303.59984869227907,-1617.4342555852443,-44.475154143873425,0.9990603962758624 -comparator,area,10,-0.23027509289593326,18.299023530396347,-8.48304611908023,-0.4881808064440773,0.9999674500675539 -shifter,delay,5000,0.4107033934426204,0.03923479405737683,-0.19848886911558317,-0.006549393512462493,0.989283342171845 -shifter,area,5000,-3612.7138133224103,-65.6549821150965,1929.186263038338,35.02443853718661,0.9998392000511572 -shifter,area,10,806.0687632950834,120.52125970491868,-682.1783666753405,-5.1440062238735225,0.9998176364985187 +Module,Metric,Freq,1,N,N^2,log2(N),Nlog2(N),R^2 +add,delay,5000,-0.038978555556527635,-0.08911531250030817,-0.00012953428819478948,0.2083593333340971,0.013950093750045424,1.0 +add,area,5000,-1913.1778463362505,-268.21377075092175,-0.4100347526051751,1046.9667200022955,47.59125331263557,1.0 +add,area,10,-13.720001333167332,14.700000312552621,1.3021426840869221e-09,-1.3062278840780171e-10,-9.375775472819561e-08,1.0 +mult,delay,5000,-0.2915958888891911,-0.02828693750009581,-3.445876736121953e-05,0.32169033333357117,0.0044735312500140964,1.0 +mult,area,5000,27780.605184113756,10418.196477973508,26.857274703166343,-24448.387256089416,-1468.2850310678027,1.0 +mult,area,10,-6472.791005245042,-2075.5787013197305,8.20962684330778,5345.246556351299,313.5693677823146,1.0 +comparator,delay,5000,0.1903951111111219,0.000987500000002994,3.427951388890516e-06,3.333333324460974e-06,-0.00012593750000039925,1.0 +comparator,area,5000,-508.51109056188875,-579.7924890645068,-1.0888888741341944,969.5466443383111,101.5524983752957,1.0 +comparator,area,10,-155.6022268893253,-40.3637507501383,-0.07230902908001494,132.9533363336765,8.452500156270371,1.0 +shifter,delay,5000,0.06953233333235516,-0.08957893750031035,-0.00015877864583368578,0.16727300000076853,0.014763625000045773,1.0 +shifter,area,5000,-237.48663487568587,1208.7075255666841,1.5708073263938906,-1678.7400476770383,-166.69187856311666,1.0 +shifter,area,10,-1079.4155736731122,-591.3687615645423,-0.877491337241916,1211.9333560050677,103.11437703155087,1.0 diff --git a/synthDC/ppaSynth.py b/synthDC/ppaSynth.py index cf7e430b5..691b796c8 100755 --- a/synthDC/ppaSynth.py +++ b/synthDC/ppaSynth.py @@ -14,9 +14,13 @@ def deleteRedundant(LoT): bashCommand = synthStr.format(*synth) outputCPL = subprocess.check_output(['bash','-c', bashCommand]) -widths = ['1'] -modules = ['mux2'] -freqs = ['10'] +d = 0.26 +f = 1/d * 1000 +arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 20, 40] + +widths = ['128'] +modules = ['comparator'] +freqs = [str(round(f+f*x/100)) for x in arr] tech = 'sky90'