This commit is contained in:
David Harris 2022-07-18 23:11:12 +00:00
commit 59eb11b73a
18 changed files with 211 additions and 225 deletions

View File

@ -114,7 +114,12 @@ void portable_free(void *p) {
#define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
// #if (XLEN==64)
// typedef unsigned long long ee_ptr_int;
// #else
// typedef unsigned long ee_ptr_int;
// #endif
#define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000
@ -196,8 +201,8 @@ void stop_time(void) {
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
unsigned long instructions = minstretDiff();
long long cm100 = 1000000000 / elapsed; // coremark score * 100
long long cpi100 = elapsed*100/instructions; // CPI * 100
ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100
ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100
ee_printf(" WALLY CoreMark Results (from get_time)\n");
ee_printf(" Elapsed MTIME: %u\n", elapsed);
ee_printf(" Elapsed MINSTRET: %lu\n", instructions);

View File

@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS;
// #elif (XLEN==32)
// #include <sys/types.h>
// typedef ee_u32 CORE_TICKS;
#else
/* Configuration: size_t and clock_t
Note these need to match the size of the clock output and the xLen the processor supports
*/
#elif (XLEN==64)
typedef unsigned long int size_t;
typedef unsigned long int clock_t;
typedef clock_t CORE_TICKS;
#else
#include <sys/types.h>
#endif
typedef clock_t CORE_TICKS;
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv64gc arch64d"
vsim -do "do wally-pipelined.do rv32gc wally32periph"

View File

@ -1 +1 @@
vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph"

View File

@ -1,10 +1,10 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`QLEN-1-(`RADIX/4):0] Quot,
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
input logic [`FMTBITS-1:0] Fmt,
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic [`NE+1:0] DivCalcExp,
input logic [`NE+1:0] DivQe,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm,
@ -14,21 +14,21 @@ module divshiftcalc(
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
// Left shift amount = DivQe+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`NE+2)'(`NF);
@ -36,6 +36,6 @@ module divshiftcalc(
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
endmodule

View File

@ -127,7 +127,7 @@ module fcvt (
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] :
(LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}};
(LeadingZeros);
///////////////////////////////////////////////////////////////////////////
// exp calculations

View File

@ -48,10 +48,10 @@ module flags(
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Nexp, // exponent of the normalized sum
input logic [`NE+1:0] Me, // exponent of the normalized sum
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
input logic FmaAs, FmaPs, // the product and modified Z signs
input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
input logic R, UfL, S, UfPlus1, // bits used to determine rounding
output logic DivByZero,
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
output logic [4:0] PostProcFlg // flags
@ -127,11 +127,11 @@ module flags(
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)

View File

@ -44,6 +44,7 @@ module fma(
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic Ss, // the sum's sign
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count
);
@ -81,7 +82,7 @@ module fma(
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
endmodule
@ -226,6 +227,7 @@ module add(
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic InvA, // do you invert the aligned addend
output logic Ss,
output logic [3*`NF+5:0] Sm // the positive sum
);
logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum
@ -257,6 +259,11 @@ module add(
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Ss = NegSum^Ps; //*** move to execute stage
endmodule

View File

@ -35,7 +35,7 @@ module fmashiftcalc(
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero
output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSZero, // is the result denormalized - calculated before LZA corection
output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
@ -57,28 +57,28 @@ module fmashiftcalc(
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign FmaConvNormSumExp = NormSumExp;
assign FmaNe = NormSumExp;
end else if (`FPSIZES == 2) begin
assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: FmaConvNormSumExp = NormSumExp;
`FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: FmaConvNormSumExp = {`NE+2{1'bx}};
`FMT: FmaNe = NormSumExp;
`FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: FmaNe = {`NE+2{1'bx}};
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: FmaConvNormSumExp = NormSumExp;
2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
2'h3: FmaNe = NormSumExp;
2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
endcase
end
@ -144,11 +144,11 @@ module fmashiftcalc(
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
// assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
// - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm};

View File

@ -114,6 +114,7 @@ module fpu (
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
// Cvt Signals
@ -255,36 +256,11 @@ module fpu (
.Xm(XManE), .Ym(YManE), .Zm(ZManE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE),
.As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE),
.Sm(SumE), .Pe(ProdExpE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE));
// // fpdivsqrt using Goldschmidt's iteration
// if(`FLEN == 64) begin
// flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// else if (`FLEN == 32) begin
// flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
// .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
@ -359,9 +335,9 @@ module fpu (
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@ -381,10 +357,10 @@ module fpu (
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged

View File

@ -48,17 +48,18 @@ module postprocess (
input logic FmaPs, // the product's sign
input logic [`NE+1:0] FmaPe, // Product exponent
input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic FmaZmSticky, // sticky bit that is calculated during alignment
input logic FmaZmS, // sticky bit that is calculated during alignment
input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter
input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z
input logic FmaSs,
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
//divide signals
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivSticky,
input logic DivS,
input logic DivDone,
input logic [`NE+1:0] DivCalcExp,
input logic [`QLEN-1-(`RADIX/4):0] Quot,
input logic [`NE+1:0] DivQe,
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
@ -77,9 +78,9 @@ module postprocess (
logic Ws;
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic Nsgn;
logic [`NE+1:0] Nexp;
logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
logic Ms;
logic [`NE+1:0] Me;
logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic S; // S bit
logic UfPlus1; // do you add one (for determining underflow flag)
@ -89,19 +90,19 @@ module postprocess (
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Invalid; // flags
logic UfLSBRes;
logic UfL;
logic [`FMTBITS-1:0] OutFmt;
// fma signals
logic [`NE+1:0] FmaSe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*`NF+8:0] FmaShiftIn; // shift input
logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results
logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
// division singals
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
logic [`NE+1:0] DivCorrExp;
logic [`NE+1:0] Qe;
logic DivByZero;
logic DivResDenorm;
logic [`NE+1:0] DivDenormShift;
@ -150,9 +151,9 @@ module postprocess (
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSel)
@ -181,9 +182,9 @@ module postprocess (
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
.DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
.DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
.Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -197,19 +198,19 @@ module postprocess (
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
.Xs, .Ys, .CvtCs, .Nsgn);
.FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
.Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf,
.DivSticky, .DivDone,
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp);
round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
.DivS, .DivDone,
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws);
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
///////////////////////////////////////////////////////////////////////////////
// Flags
@ -218,8 +219,8 @@ module postprocess (
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
.UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
.UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
///////////////////////////////////////////////////////////////////////////////
// Select the result

View File

@ -39,28 +39,25 @@ module resultsign(
input logic Mult,
input logic R,
input logic S,
input logic Nsgn,
input logic Ms,
output logic Ws
);
logic ZeroSgn;
logic InfSgn;
logic Underflow;
// logic ResultSgnTmp;
logic Zeros;
logic Infs;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign InfSgn = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn;
assign Infs = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
endmodule

View File

@ -46,29 +46,29 @@ module round(
input logic [1:0] PostProcSel,
input logic CvtResDenormUf,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] Nfrac,
input logic FmaZmSticky, // addend's sticky bit
input logic [`CORRSHIFTSZ-1:0] Mf,
input logic FmaZmS, // addend's sticky bit
input logic [`NE+1:0] FmaSe, // exponent of the normalized sum
input logic Nsgn, // the result's sign
input logic Ms, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NE+1:0] DivCorrExp, // the calculated expoent
input logic DivSticky, // sticky bit
input logic [`NE+1:0] Qe, // the calculated expoent
input logic DivS, // sticky bit
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NF-1:0] Rf, // Result fraction
output logic [`NE-1:0] Re, // Result exponent
output logic S, // sticky bit
output logic [`NE+1:0] Nexp,
output logic [`NE+1:0] Me,
output logic Plus1,
output logic R, UfLSBRes // bits needed to calculate rounding
output logic R, UfL // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic L; // bit used for rounding - least significant bit of the normalized sum
logic UfCalcPlus1;
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic NormS; // normalized sum's sticky bit
logic UfS; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic UfR;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
logic [`FLEN:0] RoundAdd; // how much to add to the result
@ -114,61 +114,61 @@ module round(
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end
@ -176,37 +176,37 @@ module round(
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
FpRound = Mf[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
@ -218,55 +218,55 @@ module round(
always_comb
case (OutFmt)
2'h3: begin
FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
assign UfL = FpRound;
// determine sticky
assign S = UfSticky | UfRound;
assign S = UfS | UfR;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even
3'b000: CalcPlus1 = R & (S| L);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Nsgn;//round down
3'b011: CalcPlus1 = ~Nsgn;//round up
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = R;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even
3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Nsgn;//round down
3'b011: UfCalcPlus1 = ~Nsgn;//round up
3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
@ -275,7 +275,7 @@ module round(
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (S | R);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
// Compute rounded result
if (`FPSIZES == 1) begin
@ -295,19 +295,19 @@ module round(
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
// determine the result to be roundned
assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSel)
2'b10: Nexp = FmaSe; // fma
2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
default: Nexp = '0;
2'b10: Me = FmaSe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Me = DivDone ? Qe : '0; // divide
default: Me = '0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd;
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];

View File

@ -38,23 +38,15 @@ module roundsign(
input logic DivOp,
input logic CvtOp,
input logic CvtCs,
output logic Nsgn
input logic FmaSs,
output logic Ms
);
logic FmaResSgnTmp;
logic DivSgn;
logic Qs;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage
// assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
assign DivSgn = Xs^Ys;
assign Qs = Xs^Ys;
// Sign for rounding calulation
assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule

View File

@ -33,13 +33,13 @@ module shiftcorrection(
input logic FmaOp,
input logic DivOp,
input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExp,
input logic [`NE+1:0] DivQe,
input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic FmaSZero,
output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction
output logic [`NE+1:0] DivCorrExp,
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe,
output logic [`NE+1:0] FmaSe // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
@ -53,16 +53,16 @@ module shiftcorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
endmodule

View File

@ -94,6 +94,7 @@ module testbenchfp;
// in-between FMA signals
logic Mult;
logic Ss;
logic [`NE+1:0] Pe;
logic ZmSticky;
logic KillProd;
@ -674,18 +675,18 @@ module testbenchfp;
fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn),
.Xe(XExp), .Ye(YExp), .Ze(ZExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan),
.XZero, .YZero, .ZZero,
.XZero, .YZero, .ZZero, .Ss,
.FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
.Pe, .ZmSticky, .KillProd);
postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
.XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
.FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
.FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));

View File

@ -114,7 +114,7 @@ logic [3:0] dummy;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"wally32d": if (`D_SUPPORTED) tests = wally32d;
// "wally32d": if (`D_SUPPORTED) tests = wally32d;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"wally32a": if (`A_SUPPORTED) tests = wally32a;
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;

View File

@ -108,7 +108,7 @@ class spike(pluginTemplate):
#TODO: The following assumes you are using the riscv-gcc toolchain. If
# not please change appropriately
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))
def runTests(self, testList):
@ -158,6 +158,11 @@ class spike(pluginTemplate):
# echo statement.
if self.target_run:
# set up the simulation command. Template is for spike. Please change.
if ('NO_SAIL=True' in testentry['macros']):
# if the tests can't run on SAIL we copy the reference output to the src directory
reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
else:
simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
else:
simcmd = 'echo "NO RUN"'