diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b2..ad04e119 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit ad04e119a5d846a1c11159786ad3382cf5ad3649 diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 3e40a68a..28b012cd 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -7,9 +7,14 @@ allClean: clean all build: ../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" - ../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib" --cflags="-nostdlib" --dummy-libs="libgcc libm libc crt0" + find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done + ../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0" -sim: size speed +sim: modelSimBuild size speed + +modelSimBuild: objdump + find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done + find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done size: ../../addins/embench-iot/benchmark_size.py --builddir=bd_size @@ -18,10 +23,7 @@ speed: ../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50 objdump: - riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64.objdump - riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/cubic/cubic > ../../addins/embench-iot/bd_speed/src/cubic/cubic.objdump - riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/md5sum/md5sum > ../../addins/embench-iot/bd_speed/src/md5sum/md5sum.objdump - riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/statemate/statemate > ../../addins/embench-iot/bd_speed/src/statemate/statemate.objdump + find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S "$$f" > "$$f.objdump"; done clean: rm -rf ../../addins/embench-iot/bd_speed/ diff --git a/pipelined/regression/fp.do b/pipelined/regression/fp.do index 208118fc..68c240c8 100644 --- a/pipelined/regression/fp.do +++ b/pipelined/regression/fp.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 55e6706c..74a2829a 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -17,9 +17,9 @@ module fcvt ( input logic XSNaNE, // is the input a signaling NaN input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`FLEN-1:0] CvtResE, // the fp to fp conversion's result - output logic [`XLEN-1:0] CvtIntResE, // the fp to fp conversion's result - output logic [4:0] CvtFlgE // the fp to fp conversion's flags + output logic [`FLEN-1:0] CvtResE, // the fp conversion result + output logic [`XLEN-1:0] CvtIntResE, // the int conversion result + output logic [4:0] CvtFlgE // the conversion's flags ); // OpCtrls: @@ -39,9 +39,10 @@ module fcvt ( logic [`FPSIZES/3:0] OutFmt; // format of the output logic [`XLEN-1:0] PosInt; // the positive integer input + logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder) logic [`NE:0] CalcExp; // the calculated expoent - logic [$clog2(`LGLEN):0] ShiftAmt; // how much to shift by + logic [$clog2(`LGLEN)-1:0] ShiftAmt; // how much to shift by logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted logic ResDenormUf;// does the result underflow or is denormalized logic ResUf; // does the result underflow @@ -71,6 +72,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? + logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -91,18 +93,11 @@ module fcvt ( /////////////////////////////////////////////////////////////////////////// // negation /////////////////////////////////////////////////////////////////////////// - // negate the input if the input is a negitive singed integer - // - remove leading ones if the input is a unsigned 32-bit integer - // - // Negitive input - // 64-bit input : negate the input - // 32-bit input : trim to 32-bits and negate the input - // Positive input - // 64-bit input : do nothing - // 32-bit input : trim to 32-bits + // 1) negate the input if the input is a negitive singed integer + // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - assign PosInt = ResSgn ? Int64 ? -ForwardedSrcAE : {{`XLEN-32{1'b0}}, -ForwardedSrcAE[31:0]} : - Int64 ? ForwardedSrcAE : {{`XLEN-32{1'b0}}, ForwardedSrcAE[31:0]}; + assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE; + assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; /////////////////////////////////////////////////////////////////////////// // lzc @@ -111,16 +106,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcIn = IntToFp ? {PosInt, {`LGLEN-`XLEN{1'b0}}} : // I->F - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; // F->F + assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; - // lglen is the largest possible value of ZeroCnt (NF or XLEN) hence normcnt must be log2(lglen) bits - logic [$clog2(`LGLEN):0] i, ZeroCnt; - always_comb begin - i = 0; - while (~LzcIn[`LGLEN-1-i] & i <= `LGLEN-1) i = i+1; // search for leading one - ZeroCnt = i; - end + lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// @@ -154,9 +143,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN):0]&{$clog2(`LGLEN)+1{~CalcExp[`NE]}} : - ResDenormUf&~IntToFp ? ($clog2(`LGLEN)+1)'(`NF-1)+CalcExp[$clog2(`LGLEN):0] : - (ZeroCnt+1)&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}}; + assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} : + ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] : + (ZeroCnt+1)&{$clog2(`LGLEN){XOrigDenormE|IntToFp}}; // shift // fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp @@ -272,7 +261,7 @@ module fcvt ( // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN){1'b0}}, (ZeroCnt&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}})}; + assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XOrigDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion @@ -568,7 +557,7 @@ module fcvt ( // - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|PosInt&IntToFp)); + assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp)); if (`FPSIZES == 1) begin // IEEE sends a payload while Riscv says to send a canonical quiet NaN @@ -755,7 +744,7 @@ module fcvt ( NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}}; end // determine the infinity result - // - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign + // - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign // - otherwise: output infinity with the correct sign // - kill the infinity singal if the input isn't fp InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 97735c5f..fca4930c 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -409,22 +409,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 - lzc lzc(.f, .NormCntE); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE)); endmodule -module lzc( - input logic [3*`NF+6:0] f, - output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift -); - - logic [$clog2(3*`NF+7)-1:0] i; - always_comb begin - i = 0; - while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one - NormCntE = i; - end -endmodule @@ -465,7 +453,7 @@ module fma2( logic ResultSgn, ResultSgnTmp; // Result sign logic [`NE+1:0] SumExp; // exponent of the normalized sum logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [`NF+2:0] NormSum; // normalized sum + logic [`NF+1:0] NormSum; // normalized sum logic NormSumSticky; // sticky bit calulated from the normalized sum logic SumZero; // is the sum zero logic ResultDenorm; // is the result denormalized @@ -582,7 +570,7 @@ module normalize( input logic KillProdM, // is the product set to zero input logic ZOrigDenormM, input logic AddendStickyM, // the sticky bit caclulated from the aligned addend - output logic [`NF+2:0] NormSum, // normalized sum + output logic [`NF+1:0] NormSum, // normalized sum output logic SumZero, // is the sum zero output logic NormSumSticky, UfSticky, // sticky bits output logic [`NE+1:0] SumExp, // exponent of the normalized sum @@ -599,7 +587,7 @@ module normalize( /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - //*** insert bias-bias simplification in fcvt.sv/phone pictures/ whiteboard... if still there + //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign SumZero = ~(|SumM); @@ -707,27 +695,27 @@ module normalize( assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; - assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; + assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4]; // Calculate the sticky bit if (`FPSIZES == 1) begin - assign NormSumSticky = |CorrSumShifted[2*`NF+2:0]; + assign NormSumSticky = |CorrSumShifted[2*`NF+3:0]; end else if (`FPSIZES == 2) begin // 3*NF+5 - NF1 - 3 - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM); end else if (`FPSIZES == 3) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | - (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | + (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2)); end else if (`FPSIZES == 4) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2)); end @@ -745,7 +733,7 @@ module fmaround( input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic UfSticky, // sticky bit for underlow calculation - input logic [`NF+2:0] NormSum, // normalized sum + input logic [`NF+1:0] NormSum, // normalized sum input logic AddendStickyM, // addend's sticky bit input logic NormSumSticky, // normalized sum's sticky bit input logic ZZeroM, // is Z zero @@ -799,83 +787,53 @@ module fmaround( if (`FPSIZES == 1) begin // determine guard, round, and least significant bit of the result - assign Guard = NormSum[2]; assign Round = NormSum[1]; - assign LSBNormSum = NormSum[3]; + assign LSBNormSum = NormSum[2]; // used to determine underflow flag - assign UfGuard = NormSum[1]; assign UfRound = NormSum[0]; - assign UfLSBNormSum = NormSum[2]; - - // determine sticky - assign Sticky = UfSticky | NormSum[0]; end else if (`FPSIZES == 2) begin // \/-------------NF---------------, - // | NF1 | 3 | | + // | NF1 | 2 | | // '-------NF1------^ // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; - assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3]; + assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; // used to determine underflow flag - assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1]; - assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; - // determine sticky - assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]); end else if (`FPSIZES == 3) begin always_comb begin case (FmtM) `FMT: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end `FMT1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF1+2]; Round = NormSum[`NF-`NF1+1]; - LSBNormSum = NormSum[`NF-`NF1+3]; + LSBNormSum = NormSum[`NF-`NF1+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF1+1]; UfRound = NormSum[`NF-`NF1]; - UfLSBNormSum = NormSum[`NF-`NF1+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF1]; end `FMT2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF2+2]; Round = NormSum[`NF-`NF2+1]; - LSBNormSum = NormSum[`NF-`NF2+3]; + LSBNormSum = NormSum[`NF-`NF2+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF2+1]; UfRound = NormSum[`NF-`NF2]; - UfLSBNormSum = NormSum[`NF-`NF2+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF2]; end default: begin - Guard = 1'bx; Round = 1'bx; LSBNormSum = 1'bx; - UfGuard = 1'bx; UfRound = 1'bx; - UfLSBNormSum = 1'bx; - Sticky = 1'bx; end endcase end @@ -885,56 +843,40 @@ module fmaround( case (FmtM) 2'h3: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end 2'h1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`D_NF+2]; Round = NormSum[`NF-`D_NF+1]; - LSBNormSum = NormSum[`NF-`D_NF+3]; + LSBNormSum = NormSum[`NF-`D_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`D_NF+1]; UfRound = NormSum[`NF-`D_NF]; - UfLSBNormSum = NormSum[`NF-`D_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`D_NF]; end 2'h0: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`S_NF+2]; Round = NormSum[`NF-`S_NF+1]; - LSBNormSum = NormSum[`NF-`S_NF+3]; + LSBNormSum = NormSum[`NF-`S_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`S_NF+1]; UfRound = NormSum[`NF-`S_NF]; - UfLSBNormSum = NormSum[`NF-`S_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`S_NF]; end 2'h2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`H_NF+2]; Round = NormSum[`NF-`H_NF+1]; - LSBNormSum = NormSum[`NF-`H_NF+3]; + LSBNormSum = NormSum[`NF-`H_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`H_NF+1]; UfRound = NormSum[`NF-`H_NF]; - UfLSBNormSum = NormSum[`NF-`H_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`H_NF]; end endcase end end + // used to determine underflow flag + assign UfLSBNormSum = Round; + // determine sticky + assign Sticky = UfSticky | UfRound; // Deterimine if a small number was supposed to be subtrated @@ -944,28 +886,28 @@ module fmaround( always_comb begin // Determine if you add 1 case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude + 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up + 3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (FrmM) - 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up - 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude + 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up + 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase // Determine if you subtract 1 case (FrmM) 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up + 3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up 3'b100: CalcMinus1 = 0;//round to nearest max magnitude default: CalcMinus1 = 1'bx; endcase @@ -973,9 +915,9 @@ module fmaround( end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + assign Plus1 = CalcPlus1 & (Sticky | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky + assign Minus1 = CalcMinus1 & (Sticky | Round); // Compute rounded result if (`FPSIZES == 1) begin @@ -1011,7 +953,7 @@ module fmaround( end - assign NormSumTruncated = NormSum[`NF+2:3]; + assign NormSumTruncated = NormSum[`NF+1:2]; assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[`NE-1:0]; @@ -1083,12 +1025,12 @@ module fmaflags( // Set Underflow flag if the number is too small to be represented in normal numbers // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal - assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Combine flags // - FMA can't set the Divide by zero flag diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 44ffc283..06ceff56 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -96,9 +96,9 @@ module unpack ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1]}; + assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1]}; + assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1]}; // is the input (in it's original format) denormalized assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; @@ -257,9 +257,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the larger precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; @@ -282,9 +282,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the smallest precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)}; @@ -447,9 +447,9 @@ module unpack ( // convert the double precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; @@ -471,9 +471,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the single precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; @@ -495,9 +495,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the half precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv new file mode 100644 index 00000000..5b1c22f9 --- /dev/null +++ b/pipelined/src/generic/lzc.sv @@ -0,0 +1,15 @@ +//leading zero counter i.e. priority encoder +module lzc #(parameter WIDTH=1) ( + input logic [WIDTH-1:0] num, + output logic [$clog2(WIDTH)-1:0] ZeroCnt +); +/* verilator lint_off CMPCONST */ + + logic [$clog2(WIDTH)-1:0] i; + always_comb begin + i = 0; + while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1; // search for leading one + ZeroCnt = i; + end +/* verilator lint_on CMPCONST */ +endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 3e90aeaf..cb214ce8 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -1174,13 +1174,13 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end - + // TestFloat sets the result to all 1's when there is an invalid result, however in // http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says // for an unsigned integer result 0 is also okay @@ -1470,7 +1470,7 @@ module readvectors ( Ans = TestVector[8]; end 2'b10: begin // half - X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]}; + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]}; Ans = TestVector[8]; end