diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c index 57b7993a..017bef74 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.c +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c @@ -114,7 +114,12 @@ void portable_free(void *p) { #define read_csr(reg) ({ unsigned long __tmp; \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) - #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) + // #if (XLEN==64) + // typedef unsigned long long ee_ptr_int; + // #else + // typedef unsigned long ee_ptr_int; + // #endif + #define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8) #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) #define TIMER_RES_DIVIDER 10000 @@ -196,8 +201,8 @@ void stop_time(void) { CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); unsigned long instructions = minstretDiff(); - long long cm100 = 1000000000 / elapsed; // coremark score * 100 - long long cpi100 = elapsed*100/instructions; // CPI * 100 + ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100 + ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100 ee_printf(" WALLY CoreMark Results (from get_time)\n"); ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MINSTRET: %lu\n", instructions); diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 33768b0f..3146d3ec 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS; // #elif (XLEN==32) // #include // typedef ee_u32 CORE_TICKS; -#else /* Configuration: size_t and clock_t Note these need to match the size of the clock output and the xLen the processor supports */ +#elif (XLEN==64) typedef unsigned long int size_t; typedef unsigned long int clock_t; -typedef clock_t CORE_TICKS; +#else +#include #endif +typedef clock_t CORE_TICKS; /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION Initialize these strings per platform diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index 51c8b3ed..6163ab8b 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv64gc arch64d" +vsim -do "do wally-pipelined.do rv32gc wally32periph" diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 7e821e58..8b5b5d62 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d" +vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph" diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index af321b25..3fbc9419 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,10 +1,10 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, input logic [`FMTBITS-1:0] Fmt, input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, output logic DivResDenorm, @@ -14,21 +14,21 @@ module divshiftcalc( // is the result denromalized // if the exponent is 1 then the result needs to be normalized then the result is denormalizes - assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]); + assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); // if the result is denormalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivCalcExp+NF+1-1 - assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp; + // Left shift amount = DivQe+NF+1-1 + assign DivDenormShift = (`NE+2)'(`NF)+DivQe; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 - // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivQe+1 + // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (`NE+2)'(`NF); @@ -36,6 +36,6 @@ module divshiftcalc( // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 4820cf28..b9932523 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -127,7 +127,7 @@ module fcvt ( // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}}; + (LeadingZeros); /////////////////////////////////////////////////////////////////////////// // exp calculations diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 4e16bc96..6b1bc638 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -48,10 +48,10 @@ module flags( input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - input logic [`NE+1:0] Nexp, // exponent of the normalized sum + input logic [`NE+1:0] Me, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs - input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding + input logic R, UfL, S, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic [4:0] PostProcFlg // flags @@ -127,11 +127,11 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 44cd3616..3cd12830 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -44,6 +44,7 @@ module fma( output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign + output logic Ss, // the sum's sign output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); @@ -81,7 +82,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -226,6 +227,7 @@ module add( output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend + output logic Ss, output logic [3*`NF+5:0] Sm // the positive sum ); logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum @@ -257,6 +259,11 @@ module add( // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; //*** move to execute stage endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index ae974eb0..a6c1a1c6 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,7 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count @@ -57,28 +57,28 @@ module fmashiftcalc( //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign FmaConvNormSumExp = NormSumExp; + assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; end else if (`FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: FmaConvNormSumExp = NormSumExp; - `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: FmaConvNormSumExp = {`NE+2{1'bx}}; + `FMT: FmaNe = NormSumExp; + `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; + default: FmaNe = {`NE+2{1'bx}}; endcase end end else if (`FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: FmaConvNormSumExp = NormSumExp; - 2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + 2'h3: FmaNe = NormSumExp; + 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; + 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; + 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; endcase end @@ -144,11 +144,11 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; + // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 1bbd0aea..e7888551 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -114,6 +114,7 @@ module fpu ( logic NegSumE, NegSumM; logic ZSgnEffE, ZSgnEffM; logic PSgnE, PSgnM; + logic SsE, SsM; logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals @@ -255,36 +256,11 @@ module fpu ( .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), + .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Sm(SumE), .Pe(ProdExpE), .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .ZmSticky(AddendStickyE), .KillProd(KillProdE)); - // // fpdivsqrt using Goldschmidt's iteration - // if(`FLEN == 64) begin - // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // else if (`FLEN == 32) begin - // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), - // .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal @@ -359,9 +335,9 @@ module fpu ( {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); + flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -381,10 +357,10 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), + .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e0eb50ac..7e741abb 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -48,17 +48,18 @@ module postprocess ( input logic FmaPs, // the product's sign input logic [`NE+1:0] FmaPe, // Product exponent input logic [3*`NF+5:0] FmaSm, // the positive sum - input logic FmaZmSticky, // sticky bit that is calculated during alignment + input logic FmaZmS, // sticky bit that is calculated during alignment input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z + input logic FmaSs, input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic DivSticky, + input logic DivS, input logic DivDone, - input logic [`NE+1:0] DivCalcExp, - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`NE+1:0] DivQe, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -77,9 +78,9 @@ module postprocess ( logic Ws; logic [`NF-1:0] Rf; // Result fraction logic [`NE-1:0] Re; // Result exponent - logic Nsgn; - logic [`NE+1:0] Nexp; - logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction + logic Ms; + logic [`NE+1:0] Me; + logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) @@ -89,19 +90,19 @@ module postprocess ( logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic UfLSBRes; + logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input - logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; - logic [`NE+1:0] DivCorrExp; + logic [`NE+1:0] Qe; logic DivByZero; logic DivResDenorm; logic [`NE+1:0] DivDenormShift; @@ -150,9 +151,9 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -181,9 +182,9 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, - .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, + .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -197,19 +198,19 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .Xs, .Ys, .CvtCs, .Nsgn); + .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, - .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp); + round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .DivS, .DivDone, + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -218,8 +219,8 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, - .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); + .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// // Select the result diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index e6de0c18..e1ea5e41 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -39,28 +39,25 @@ module resultsign( input logic Mult, input logic R, input logic S, - input logic Nsgn, + input logic Ms, output logic Ws ); - logic ZeroSgn; - logic InfSgn; - logic Underflow; - // logic ResultSgnTmp; + logic Zeros; + logic Infs; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)); - assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; + assign Infs = ZInf ? FmaAs : FmaPs; + assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 38bacce0..6132dba4 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -46,29 +46,29 @@ module round( input logic [1:0] PostProcSel, input logic CvtResDenormUf, input logic CvtResUf, - input logic [`CORRSHIFTSZ-1:0] Nfrac, - input logic FmaZmSticky, // addend's sticky bit + input logic [`CORRSHIFTSZ-1:0] Mf, + input logic FmaZmS, // addend's sticky bit input logic [`NE+1:0] FmaSe, // exponent of the normalized sum - input logic Nsgn, // the result's sign + input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent - input logic [`NE+1:0] DivCorrExp, // the calculated expoent - input logic DivSticky, // sticky bit + input logic [`NE+1:0] Qe, // the calculated expoent + input logic DivS, // sticky bit output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit - output logic [`NE+1:0] Nexp, + output logic [`NE+1:0] Me, output logic Plus1, - output logic R, UfLSBRes // bits needed to calculate rounding + output logic R, UfL // bits needed to calculate rounding ); - logic LSBRes; // bit used for rounding - least significant bit of the normalized sum + logic L; // bit used for rounding - least significant bit of the normalized sum logic UfCalcPlus1; - logic NormSumSticky; // normalized sum's sticky bit - logic UfSticky; // sticky bit for underlow calculation + logic NormS; // normalized sum's sticky bit + logic UfS; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic UfRound; + logic UfR; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -114,61 +114,61 @@ module round( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); end @@ -176,37 +176,37 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1]; - assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1]; - assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF1]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF2]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin FpRound = 1'bx; @@ -218,55 +218,55 @@ module round( always_comb case (OutFmt) 2'h3: begin - FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound; - assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes; - assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; + assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound; + assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes; + assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; // used to determine underflow flag - assign UfLSBRes = FpRound; + assign UfL = FpRound; // determine sticky - assign S = UfSticky | UfRound; + assign S = UfS | UfR; always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even + 3'b000: CalcPlus1 = R & (S| L);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn;//round down - 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even + 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn;//round down - 3'b011: UfCalcPlus1 = ~Nsgn;//round up - 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase @@ -275,7 +275,7 @@ module round( // If an answer is exact don't round assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky + assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky // Compute rounded result if (`FPSIZES == 1) begin @@ -295,19 +295,19 @@ module round( assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned - assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; always_comb case(PostProcSel) - 2'b10: Nexp = FmaSe; // fma - 2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt - 2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide - default: Nexp = '0; + 2'b10: Me = FmaSe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt + 2'b01: Me = DivDone ? Qe : '0; // divide + default: Me = '0; endcase // round the result // - if the fraction overflows one should be added to the exponent - assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; assign Re = FullRe[`NE-1:0]; diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index 22686b24..acecb594 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -38,23 +38,15 @@ module roundsign( input logic DivOp, input logic CvtOp, input logic CvtCs, - output logic Nsgn + input logic FmaSs, + output logic Ms ); - logic FmaResSgnTmp; - logic DivSgn; + logic Qs; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage - - // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); - - assign DivSgn = Xs^Ys; + assign Qs = Xs^Ys; // Sign for rounding calulation - assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp); + assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index ecfd9ba0..71a2393a 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -33,13 +33,13 @@ module shiftcorrection( input logic FmaOp, input logic DivOp, input logic DivResDenorm, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaSZero, - output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction - output logic [`NE+1:0] DivCorrExp, + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe, output logic [`NE+1:0] FmaSe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction @@ -53,16 +53,16 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2}; + assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2}; endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index a95a6624..be5114e9 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -94,6 +94,7 @@ module testbenchfp; // in-between FMA signals logic Mult; + logic Ss; logic [`NE+1:0] Pe; logic ZmSticky; logic KillProd; @@ -674,18 +675,18 @@ module testbenchfp; fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), .Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), - .XZero, .YZero, .ZZero, + .XZero, .YZero, .ZZero, .Ss, .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), - .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), + .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 6d537b14..0fb5f5e6 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -114,7 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "wally32d": if (`D_SUPPORTED) tests = wally32d; + // "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 88a6269e..fd429395 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -108,7 +108,7 @@ class spike(pluginTemplate): #TODO: The following assumes you are using the riscv-gcc toolchain. If # not please change appropriately - self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ') + self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 ')) def runTests(self, testList): @@ -158,7 +158,12 @@ class spike(pluginTemplate): # echo statement. if self.target_run: # set up the simulation command. Template is for spike. Please change. - simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) + if ('NO_SAIL=True' in testentry['macros']): + # if the tests can't run on SAIL we copy the reference output to the src directory + reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test)) + simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying + else: + simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) else: simcmd = 'echo "NO RUN"'