diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 0cb54e1a1..4a4875dfa 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -6,14 +6,23 @@ all: build sim allClean: clean all -build: - ../../addins/embench-iot/build_all.py -v --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" +build: buildspeed buildsize + +buildspeed: + ../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done - ../../addins/embench-iot/build_all.py -v --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0" -sim: modelSimBuild size speed +buildsize: + ../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0" -modelSimBuild: objdump +sim: modelSimBuild speed + +# vsim: +# cd ../../pipelined/regression/ +# vsim -c -do "do wally-pipelined-batch.do rv32gc embench" +# cd ../../benchmarks/embench/ + +modelSimBuild: buildspeed objdump find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done @@ -23,13 +32,16 @@ size: speed: ../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 -objdump: +objdump: buildspeed find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done clean: rm -rf ../../addins/embench-iot/bd_speed/ rm -rf ../../addins/embench-iot/bd_size/ +allclean: clean + rm -rf ../../addins/embench-iot/logs/ + # std: # ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" # riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 36cda4d91..e88b012aa 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -39,12 +39,12 @@ // MISA RISC-V configuration per specification //16 - quad 3 - double 5 - single -`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 0 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 -`define ZFH_SUPPORTED 1 +`define ZFH_SUPPORTED 0 /// Microarchitectural Features `define UARCH_PIPELINED 1 diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 05a91d212..a1a934ffe 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -8,7 +8,7 @@ module fclassify ( input logic XDenormE, // is denormal input logic XZeroE, // is zero input logic XInfE, // is infinity - output logic [63:0] ClassResE // classify result + output logic [`XLEN-1:0] ClassResE // classify result ); logic PInf, PZero, PNorm, PDenorm; @@ -37,6 +37,6 @@ module fclassify ( // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 0640a544e..690f9f940 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -1,3 +1,4 @@ +`include "wally-config.vh" module fctrl ( input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision @@ -13,7 +14,7 @@ module fctrl ( output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit output logic [1:0] FResSelD, // select one of the results done in the memory stage output logic [1:0] FIntResSelD, // select the result that will be written to the integer register - output logic FmtD, // precision - single-0 double-1 + output logic [`FPSIZES/3:0] FmtD, // precision - single-0 double-1 output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude output logic FWriteIntD // is the result written to the integer register ); @@ -119,8 +120,23 @@ module fctrl ( // Precision // 0-single // 1-double - assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; + + if (`FPSIZES == 1)begin + logic [1:0] FmtTmp; + assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + assign FmtD = `FMT == FmtTmp; +end + //assign FmtD = 0; *** change back after full paramerterization + else if (`FPSIZES == 2)begin + logic [1:0] FmtTmp; + assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + assign FmtD = `FMT == FmtTmp; + end + else if (`FPSIZES == 3|`FPSIZES == 4) + assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + + // assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; // FResultSel: // 000 - ReadRes - load // 001 - FMARes - FMA and multiply diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 5d16ccc51..f160d3ea6 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -472,7 +472,7 @@ module fma2( // Select the result /////////////////////////////////////////////////////////////////////////////// - resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, + resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM); @@ -1002,6 +1002,7 @@ module resultselect( input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN input logic ZDenormM, // is the original precision denormalized + input logic ZZeroM, input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic ResultSgn, // the result's sign @@ -1027,7 +1028,7 @@ module resultselect( end assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; assign NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1046,7 +1047,7 @@ module resultselect( {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1066,7 +1067,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1082,7 +1083,7 @@ module resultselect( end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1099,7 +1100,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)}; - KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)}; NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]}; @@ -1137,7 +1138,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1153,7 +1154,7 @@ module resultselect( end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; - KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]}; @@ -1170,7 +1171,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; - KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]}; @@ -1188,7 +1189,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; - KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 757810921..0fa125b80 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -95,7 +95,7 @@ module fpu ( logic XNaNQ, YNaNQ; // is the input a NaN - divide logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XDenormE, ZDenormE; // is the input denormalized logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage logic XZeroQ, YZeroQ; // is the input zero - divide @@ -115,7 +115,7 @@ module fpu ( logic [63:0] CvtResE; // FP <-> int convert result logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this - logic [63:0] ClassResE; // classify result + logic [`XLEN-1:0] ClassResE; // classify result logic [63:0] CmpResE; // compare result logic CmpNVE; // compare invalid flag (Not Valid) logic [63:0] SgnResE; // sign injection result @@ -176,7 +176,7 @@ module fpu ( // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); // FMA @@ -231,7 +231,7 @@ module fpu ( mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE); // select the result that may be written to the integer register - to IEU - mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], + mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, CvtIntResE, FIntResSelE, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok // *** make sure the fpu matches the chapter diagram diff --git a/pipelined/src/fpu/fregfile.sv b/pipelined/src/fpu/fregfile.sv index 2d54038de..00c89ff56 100644 --- a/pipelined/src/fpu/fregfile.sv +++ b/pipelined/src/fpu/fregfile.sv @@ -33,10 +33,10 @@ module fregfile ( input logic clk, reset, input logic we4, input logic [4:0] a1, a2, a3, a4, - input logic [63:0] wd4, - output logic [63:0] rd1, rd2, rd3); + input logic [`FLEN-1:0] wd4, + output logic [`FLEN-1:0] rd1, rd2, rd3); - logic [63:0] rf[31:0]; + logic [`FLEN-1:0] rf[31:0]; integer i; // three ported register file diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 8474fdff6..00c1372c2 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -26,13 +26,14 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "wally-config.vh" module fsgninj ( input logic XSgnE, YSgnE, // X and Y sign bits - input logic [63:0] FSrcXE, // X - input logic FmtE, // precision 1 = double 0 = single + input logic [`FLEN-1:0] FSrcXE, // X + input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single input logic [1:0] SgnOpCodeE, // operation control - output logic [63:0] SgnResE // result + output logic [`FLEN-1:0] SgnResE // result ); logic ResSgn; @@ -50,7 +51,30 @@ module fsgninj ( // format final result based on precision // - uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]}; + + if (`FPSIZES == 1) + assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; + + else if (`FPSIZES == 2) + assign SgnResE = FmtE ? {ResSgn, FSrcXE[`FLEN-2:0]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]}; + + else if (`FPSIZES == 3) + always_comb + case (FmtE) + `FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; + `FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]}; + `FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]}; + default: SgnResE = 0; + endcase + + else if (`FPSIZES == 4) + always_comb + case (FmtE) + 2'h3: SgnResE = {ResSgn, FSrcXE[`Q_LEN-2:0]}; + 2'h1: SgnResE = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, FSrcXE[`D_LEN-2:0]}; + 2'h0: SgnResE = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, FSrcXE[`S_LEN-2:0]}; + 2'h2: SgnResE = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, FSrcXE[`H_LEN-2:0]}; + endcase endmodule diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index e9f005e7a..e28b2efe1 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -8,26 +8,29 @@ module unpack ( output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) ); logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ - logic XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero + logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMaxE, ZExpMaxE; // is the exponent all 1s unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), - .NaN(XNaNE), .SNaN(XSNaNE), .Denorm(XDenormE), - .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE)); + .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), + .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), - .NaN(YNaNE), .SNaN(YSNaNE), .Denorm(YDenormE), - .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE)); + .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), + .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), - .NaN(ZNaNE), .SNaN(ZSNaNE), .Denorm(ZDenormE), - .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE)); + .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), + .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); + // is the input denormalized + assign XDenormE = ~XExpNonZero & ~XFracZero; + assign ZDenormE = ~ZExpNonZero & ~ZFracZero; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 9c58a444d..434a5bf00 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -8,25 +8,24 @@ module unpackinput ( output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) output logic NaN, // is XYZ a NaN output logic SNaN, // is XYZ a signaling NaN - output logic Denorm, // is XYZ denormalized output logic Zero, // is XYZ zero output logic Inf, // is XYZ infinity + output logic ExpNonZero, // is the exponent not zero + output logic FracZero, // is the fraction zero output logic ExpMax // does In have the maximum exponent (NaN or Inf) ); logic [`NF-1:0] Frac; //Fraction of XYZ - logic ExpNonZero; // is the exponent of XYZ non-zero - logic FracZero; // is the fraction zero logic ExpZero; + logic BadNaNBox; if (`FPSIZES == 1) begin // if there is only one floating point format supported + assign BadNaNBox = 0; assign Sgn = In[`FLEN-1]; // sign bit assign Frac = In[`NF-1:0]; // fraction (no assumed 1) - assign FracZero = ~|Frac; // is the fraction zero? - assign ExpNonZero = |Exp; // is the exponent non-zero - assign Denorm = ~ExpNonZero & ~FracZero; // is the input (in its original format) denormalized - assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm}; // exponent. Denormalized numbers have effective biased exponent of 1 - assign ExpMax = &Exp; // is the exponent all 1's + assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero + assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. Denormalized numbers have effective biased exponent of 1 + assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported //***need better names for these constants // largest format | smaller format @@ -47,25 +46,16 @@ module unpackinput ( // quad and half // double and half - logic [`LEN1-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = FmtE ? In[`FLEN-1] : Len1[`LEN1-1]; + assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = FmtE ? In[`NF-1:0] : {Len1[`NF1-1:0], (`NF-`NF1)'(0)}; + assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; - // is the fraction zero - assign FracZero = ~|Frac; - // is the exponent non-zero - assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |Len1[`LEN1-2:`NF1]; - - // is the input (in it's original format) denormalized - assign Denorm = ~ExpNonZero & ~FracZero; + assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; // example double to single conversion: // 1023 = 0011 1111 1111 @@ -77,12 +67,10 @@ module unpackinput ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|Denorm} : {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; + assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; - - // is the exponent all 1's - assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &Len1[`LEN1-2:`NF1]; + assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; end else if (`FPSIZES == 3) begin // three floating point precsions supported @@ -104,22 +92,21 @@ module unpackinput ( // quad and double and half // quad and single and half - logic [`LEN1-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision - logic [`LEN2-1:0] Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision - assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision - assign Len2 = &In[`FLEN-1:`LEN2] ? In[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; - + // Check NaN boxing + always_comb + case (FmtE) + `FMT: BadNaNBox = 0; + `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; + `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; + default: BadNaNBox = 0; + endcase // extract the sign bit always_comb case (FmtE) `FMT: Sgn = In[`FLEN-1]; - `FMT1: Sgn = Len1[`LEN1-1]; - `FMT2: Sgn = Len2[`LEN2-1]; + `FMT1: Sgn = In[`LEN1-1]; + `FMT2: Sgn = In[`LEN2-1]; default: Sgn = 0; endcase @@ -127,27 +114,20 @@ module unpackinput ( always_comb case (FmtE) `FMT: Frac = In[`NF-1:0]; - `FMT1: Frac = {Len1[`NF1-1:0], (`NF-`NF1)'(0)}; - `FMT2: Frac = {Len2[`NF2-1:0], (`NF-`NF2)'(0)}; + `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; + `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; default: Frac = 0; endcase - // is the fraction zero - assign FracZero = ~|Frac; - - // is the exponent non-zero always_comb case (FmtE) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) - `FMT1: ExpNonZero = |Len1[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) - `FMT2: ExpNonZero = |Len2[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) + `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) + `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) default: ExpNonZero = 0; endcase - // is the input (in it's original format) denormalized - assign Denorm = ~ExpNonZero & ~FracZero; - // example double to single conversion: // 1023 = 0011 1111 1111 // 127 = 0000 0111 1111 (subtract this) @@ -159,9 +139,9 @@ module unpackinput ( // convert the larger precision's exponent to use the largest precision's bias always_comb case (FmtE) - `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm}; - `FMT1: Exp = {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; - `FMT2: Exp = {Len2[`LEN2-2], {`NE-`NE2{~Len2[`LEN2-2]}}, Len2[`LEN2-3:`NF2+1], Len2[`NF2]|Denorm}; + `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; + `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; default: Exp = 0; endcase @@ -169,8 +149,8 @@ module unpackinput ( always_comb case (FmtE) `FMT: ExpMax = &In[`FLEN-2:`NF]; - `FMT1: ExpMax = &Len1[`LEN1-2:`NF1]; - `FMT2: ExpMax = &Len2[`LEN2-2:`NF2]; + `FMT1: ExpMax = &In[`LEN1-2:`NF1]; + `FMT2: ExpMax = &In[`LEN2-2:`NF2]; default: ExpMax = 0; endcase @@ -184,27 +164,22 @@ module unpackinput ( // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 - - logic [`D_LEN-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision - logic [`S_LEN-1:0] Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision - logic [`H_LEN-1:0] Len3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision - assign Len1 = &In[`Q_LEN-1:`D_LEN] ? In[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision - assign Len2 = &In[`Q_LEN-1:`S_LEN] ? In[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision - assign Len3 = &In[`Q_LEN-1:`H_LEN] ? In[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + // Check NaN boxing + always_comb + case (FmtE) + 2'b11: BadNaNBox = 0; + 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; + 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; + 2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN]; + endcase // extract sign bit always_comb case (FmtE) 2'b11: Sgn = In[`Q_LEN-1]; - 2'b01: Sgn = Len1[`D_LEN-1]; - 2'b00: Sgn = Len2[`S_LEN-1]; - 2'b10: Sgn = Len3[`H_LEN-1]; + 2'b01: Sgn = In[`D_LEN-1]; + 2'b00: Sgn = In[`S_LEN-1]; + 2'b10: Sgn = In[`H_LEN-1]; endcase @@ -212,26 +187,20 @@ module unpackinput ( always_comb case (FmtE) 2'b11: Frac = In[`Q_NF-1:0]; - 2'b01: Frac = {Len1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - 2'b00: Frac = {Len2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - 2'b10: Frac = {Len3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; + 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + 2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; endcase - // is the fraction zero - assign FracZero = ~|Frac; - // is the exponent non-zero always_comb case (FmtE) 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpNonZero = |Len1[`D_LEN-2:`D_NF]; - 2'b00: ExpNonZero = |Len2[`S_LEN-2:`S_NF]; - 2'b10: ExpNonZero = |Len3[`H_LEN-2:`H_NF]; + 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; + 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; + 2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; endcase - // is the input (in it's original format) denormalized - assign Denorm = ~ExpNonZero & ~FracZero; - // example double to single conversion: // 1023 = 0011 1111 1111 @@ -244,10 +213,10 @@ module unpackinput ( // convert the double precsion exponent into quad precsion always_comb case (FmtE) - 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|Denorm}; - 2'b01: Exp = {Len1[`D_LEN-2], {`Q_NE-`D_NE{~Len1[`D_LEN-2]}}, Len1[`D_LEN-3:`D_NF+1], Len1[`D_NF]|Denorm}; - 2'b00: Exp = {Len2[`S_LEN-2], {`Q_NE-`S_NE{~Len2[`S_LEN-2]}}, Len2[`S_LEN-3:`S_NF+1], Len2[`S_NF]|Denorm}; - 2'b10: Exp = {Len3[`H_LEN-2], {`Q_NE-`H_NE{~Len3[`H_LEN-2]}}, Len3[`H_LEN-3:`H_NF+1], Len3[`H_NF]|Denorm}; + 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; + 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; + 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; + 2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; endcase @@ -255,19 +224,18 @@ module unpackinput ( always_comb case (FmtE) 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpMax = &Len1[`D_LEN-2:`D_NF]; - 2'b00: ExpMax = &Len2[`S_LEN-2:`S_NF]; - 2'b10: ExpMax = &Len3[`H_LEN-2:`H_NF]; + 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; + 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; + 2'b10: ExpMax = &In[`H_LEN-2:`H_NF]; endcase end // Output logic - assign ExpZero = ~ExpNonZero; // is the exponent all 0's? + assign FracZero = ~|Frac; // is the fraction zero? assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand - // *** - force to be a NaN if it isn't properly Nan Boxed - assign NaN = ExpMax & ~FracZero; // is the input a NaN? - assign SNaN = NaN&~Frac[`NF-1]; // is the input a singnaling NaN? + assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN? + assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? assign Inf = ExpMax & FracZero; // is the input infinity? - assign Zero = ExpZero & FracZero; // is the input zero? + assign Zero = ~ExpNonZero & FracZero; // is the input zero? endmodule \ No newline at end of file diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv index 1ce082475..123edcb6e 100644 --- a/pipelined/src/generic/lzc.sv +++ b/pipelined/src/generic/lzc.sv @@ -1,5 +1,5 @@ //leading zero counter i.e. priority encoder -module lzc #(parameter WIDTH=1) ( +module lzc #(parameter WIDTH = 1) ( input logic [WIDTH-1:0] num, output logic [$clog2(WIDTH+1)-1:0] ZeroCnt ); diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 835b1d1b6..2275b93ed 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -44,7 +44,7 @@ module srt #(parameter Nf=52) ( input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement - input logic Int, // Choose integer inputss + input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic rsign, output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers @@ -52,7 +52,7 @@ module srt #(parameter Nf=52) ( output logic [3:0] Flags ); - logic qp, qz, qm; // quotient is +1, 0, or -1 + logic qp, qz, qm; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; logic [Nf-1:0] X, Dpreproc; @@ -223,17 +223,17 @@ module otfc2 #(parameter N=52) ( output logic [N-1:0] r ); - // The on-the-fly converter transfers the quotient + // The on-the-fly converter transfers the quotient // bits to the quotient as they come. // - // This code follows the psuedocode presented in the + // This code follows the psuedocode presented in the // floating point chapter of the book. Right now, // it is written for Radix-2 division. // - // QM is Q-1. It allows us to write negative bits + // QM is Q-1. It allows us to write negative bits // without using a costly CPA. logic [N+2:0] Q, QM, QNext, QMNext; - // QR and QMR are the shifted versions of Q and QM. + // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. logic [N+1:0] QR, QMR; diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 892e76373..b46061a03 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -79,7 +79,6 @@ module testbenchfp; logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan; logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan; logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan; - logic XNorm; // is X normal logic XNaN, YNaN, ZNaN; // is the input NaN logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN; logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN; @@ -92,12 +91,12 @@ module testbenchfp; logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN; logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN; logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN; - logic XDenorm, YDenorm, ZDenorm; // is the input denormalized - logic FmaRneXDenorm, FmaRneYDenorm, FmaRneZDenorm; - logic FmaRzXDenorm, FmaRzYDenorm, FmaRzZDenorm; - logic FmaRuXDenorm, FmaRuYDenorm, FmaRuZDenorm; - logic FmaRdXDenorm, FmaRdYDenorm, FmaRdZDenorm; - logic FmaRnmXDenorm, FmaRnmYDenorm, FmaRnmZDenorm; + logic XDenorm, ZDenorm; // is the input denormalized + logic FmaRneXDenorm, FmaRneZDenorm; + logic FmaRzXDenorm, FmaRzZDenorm; + logic FmaRuXDenorm, FmaRuZDenorm; + logic FmaRdXDenorm, FmaRdZDenorm; + logic FmaRnmXDenorm, FmaRnmZDenorm; logic XInf, YInf, ZInf; // is the input infinity logic FmaRneXInf, FmaRneYInf, FmaRneZInf; logic FmaRzXInf, FmaRzYInf, FmaRzZInf; @@ -683,7 +682,7 @@ module testbenchfp; .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), - .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), + .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero), .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal), .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ)); @@ -693,7 +692,7 @@ module testbenchfp; .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), - .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), + .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero), .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal), .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ)); @@ -703,7 +702,7 @@ module testbenchfp; .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), - .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), + .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero), .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal), .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ)); @@ -713,7 +712,7 @@ module testbenchfp; .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), - .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), + .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero), .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal), .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ)); @@ -723,7 +722,7 @@ module testbenchfp; .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN), .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), - .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), + .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero), .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal), .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ)); @@ -733,9 +732,9 @@ module testbenchfp; .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), - .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), + .XDenormE(XDenorm), .ZDenormE(ZDenorm), .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), - .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf),.XNormE(XNorm), .XExpMaxE(XExpMax), + .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax), .X, .Y, .Z); @@ -1294,13 +1293,13 @@ module readfmavectors ( output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic [`FLEN-1:0] X, Y, Z // inputs ); - logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA + logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg always @(posedge clk) begin @@ -1335,7 +1334,7 @@ module readfmavectors ( end unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE, - .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, + .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .ZDenormE); endmodule @@ -1373,10 +1372,10 @@ module readvectors ( output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XNormE, XExpMaxE, + output logic XExpMaxE, output logic [`FLEN-1:0] X, Y, Z ); @@ -1660,7 +1659,7 @@ module readvectors ( end unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, - .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, + .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index ba3122926..0ebab1cf9 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -128,7 +128,8 @@ logic [3:0] dummy; end end - string signame, memfilename, pathname, objdumpfilename, adrstr; + string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile; + integer outputFilePointer; logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; logic UARTSin, UARTSout; @@ -213,70 +214,88 @@ logic [3:0] dummy; $display("Benchmark: coremark is done."); $stop; end + // Termination condition (i.e. we finished running current test) if (DCacheFlushDone) begin - - #600; // give time for instructions in pipeline to finish - // clear signature to prevent contamination from previous tests - for(i=0; i