This commit is contained in:
Daniel Torres 2022-07-18 13:30:50 -07:00
commit a190bc4471
56 changed files with 1103 additions and 3474 deletions

View File

@ -39,7 +39,7 @@
// MISA RISC-V configuration per specification
// ZYXWVUTSRQPONMLKJIHGFEDCBA
`define MISA 32'b0000000000101000001000100100101
`define MISA 32'b0000000000101000001000100101101
`define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32

View File

@ -95,11 +95,25 @@
// largest length in IEU/FPU
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
// division constants
`define RADIX 32'h2
`define DIVCOPIES 32'h1
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
// one interation is required for the integer bit for minimally redundent radix-4
`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
`define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define USE_SRAM 0

View File

@ -6,7 +6,7 @@
# fma - test fma
# sub - test subtraction
# div - test division
# sqrt - test square ro
# sqrt - test square root
# all - test everything
vsim -do "do testfloat.do rv64fp mul"
vsim -do "do testfloat.do rv64fp $1"

View File

@ -1,7 +1,9 @@
# cvtint - test integer conversion unit (fcvtint)
# cvtfp - test floating-point conversion unit (fcvtfp)
# cmp - test comparison unit's LT, LE, EQ opperations (fcmp)
# add - test addition
# fma - test fma
# sub - test subtraction
# div - test division
# sqrt - test square root

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv32gc arch32i"
vsim -do "do wally-pipelined.do rv32gc wally32periph"

View File

@ -1 +1 @@
vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f"
vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"

View File

@ -9,22 +9,31 @@ add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans
add wave -noupdate /testbenchfp/DivStart
add wave -noupdate /testbenchfp/DivBusy
add wave -noupdate /testbenchfp/srtfsm/state
add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -42,10 +42,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
input logic [WORDLEN-1:0] FinalWriteData,
input logic FStore2,
output logic CacheCommitted,
output logic CacheStall,
// to performance counters to cpu
@ -72,7 +70,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
localparam SETLEN = $clog2(NUMLINES);
localparam SETTOP = SETLEN+OFFSETLEN;
localparam TAGLEN = `PA_BITS - SETTOP;
localparam WORDSPERLINE = LINELEN/`XLEN;
localparam WORDSPERLINE = LINELEN/WORDLEN;
localparam FlushAdrThreshold = NUMLINES - 1;
logic SelAdr;
@ -123,7 +121,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM));
@ -162,12 +160,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
if (`LLEN>`XLEN)
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
else
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),

View File

@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic [$clog2(NUMLINES)-1:0] RAdr,
input logic [`PA_BITS-1:0] PAdr,
input logic [LINELEN-1:0] CacheWriteData,
input logic FLoad2,
input logic FStore2,
input logic SetValidWay,
input logic ClearValidWay,
input logic SetDirtyWay,
@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0};
end else
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));

View File

@ -1,10 +1,10 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
input logic [`FMTBITS-1:0] Fmt,
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
input logic [`NE+1:0] DivCalcExp,
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic [`NE+1:0] DivQe,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm,
@ -14,27 +14,28 @@ module divshiftcalc(
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
// Left shift amount = DivQe+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`NE+2)'(`NF);
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
// *** may be able to reduce shifter size
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
endmodule

View File

@ -43,26 +43,27 @@ module divsqrt(
input logic StallM,
input logic StallE,
output logic DivStickyM,
output logic DivNegStickyM,
output logic DivBusy,
output logic DivDone,
output logic [`NE+1:0] DivCalcExpM,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [`DIVLEN+2:0] QuotM
output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`QLEN-1-(`RADIX/4):0] QuotM
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] NextWSN, NextWCN;
logic [`DIVLEN+3:0] WS, WC;
logic [`DIVLEN+3:0] StickyWSA;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DURLEN-1:0] Dur;
logic NegSticky;
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
.StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
endmodule

View File

@ -68,7 +68,8 @@ module fcvt (
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability
@ -102,10 +103,11 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// shifter
@ -119,13 +121,13 @@ module fcvt (
// denormalized/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by LeadingZeros+1 - to shift till the result is normalized
// - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] :
(LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
(LeadingZeros);
///////////////////////////////////////////////////////////////////////////
// exp calculations
@ -197,14 +199,14 @@ module fcvt (
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
// - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
// Process:
// - shifted right by XLEN (XLEN)
// - shift left to normilize (-1-LeadingZeros)
// - shift left to normilize (-LeadingZeros)
// - newBias to make the biased exponent
// oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion

View File

@ -34,24 +34,24 @@ module flags(
input logic XInf, YInf, ZInf, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic NaNIn, // is a NaN input being used
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic XZero, YZero, // inputs are zero
input logic XNaN, YNaN, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Nexp, // exponent of the normalized sum
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Me, // exponent of the normalized sum
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
input logic FmaAs, FmaPs, // the product and modified Z signs
input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
input logic R, UfL, S, UfPlus1, // bits used to determine rounding
output logic DivByZero,
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
output logic [4:0] PostProcFlg // flags
@ -73,30 +73,30 @@ module flags(
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
endcase
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
@ -110,14 +110,14 @@ module flags(
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
// detecting tininess after rounding
// the exponent is negitive
@ -127,11 +127,11 @@ module flags(
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
@ -153,7 +153,7 @@ module flags(
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);

View File

@ -51,7 +51,6 @@ module fma(
logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
@ -70,20 +69,21 @@ module fma(
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// calculate the signs and take the opperation into account
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
endmodule
@ -172,7 +172,7 @@ module align(
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProd = ACnt[`NE+1]|XZero|YZero;
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
@ -183,7 +183,7 @@ module align(
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = ZmPreshifted;
ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
@ -221,14 +221,14 @@ module add(
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic KillProd, // should the product be set to 0
input logic XZero, YZero, // is the input zero
input logic ZmSticky,
output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic InvA, // do you invert the aligned addend
output logic [3*`NF+5:0] Sm, // the positive sum
output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum
output logic [3*`NF+5:0] Sm // the positive sum
);
logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum
///////////////////////////////////////////////////////////////////////////////
// Addition
@ -243,13 +243,14 @@ module add(
assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = Pm&{2*`NF+2{~KillProd}};
// Do the addition
// - calculate a positive and negitive sum in parallel
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
// Zsticky Psticky
// PreSum -1 = don't add 1 +1 = add 2
// NegPreSum +1 = add 2 -1 = don't add 1
// for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
// Is the sum negitive
assign NegSum = PreSum[3*`NF+6];
@ -261,7 +262,7 @@ endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
input logic [2*`NF+3:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
);
@ -273,12 +274,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
assign G[3*`NF+6:2*`NF+4] = 0;
assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
assign T[1:0] = A[1:0];
assign G[1:0] = 0;
assign Z[1:0] = ~A[1:0];
assign T[2*`NF+3:0] = A[2*`NF+3:0]^P;
assign G[2*`NF+3:0] = A[2*`NF+3:0]&P;
assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P;
// Apply function to determine Leading pattern

View File

@ -35,9 +35,8 @@ module fmashiftcalc(
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero
input logic ZDenorm,
output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSmZero, // is the result denormalized - calculated before LZA corection
output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSZero, // is the result denormalized - calculated before LZA corection
output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
@ -50,35 +49,36 @@ module fmashiftcalc(
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign FmaSmZero = ~(|FmaSm);
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
// ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign FmaConvNormSumExp = NormSumExp;
assign FmaNe = NormSumExp;
end else if (`FPSIZES == 2) begin
assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: FmaConvNormSumExp = NormSumExp;
`FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: FmaConvNormSumExp = {`NE+2{1'bx}};
`FMT: FmaNe = NormSumExp;
`FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: FmaNe = {`NE+2{1'bx}};
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: FmaConvNormSumExp = NormSumExp;
2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
2'h3: FmaNe = NormSumExp;
2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
endcase
end
@ -90,7 +90,7 @@ module fmashiftcalc(
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
@ -98,7 +98,7 @@ module fmashiftcalc(
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@ -110,9 +110,9 @@ module fmashiftcalc(
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
always_comb begin
case (Fmt)
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultDenorm = 1'bx;
endcase
end
@ -129,10 +129,10 @@ module fmashiftcalc(
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
always_comb begin
case (Fmt)
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
end
@ -144,13 +144,13 @@ module fmashiftcalc(
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
// assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
// - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm};
assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift;
assign FmaShiftAmt = FmaNCnt+DenormShift;
endmodule

View File

@ -42,7 +42,7 @@ module fpu (
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadStoreM, // Fp load instruction?
output logic FLoad2,
output logic FStore2,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
@ -125,12 +125,11 @@ module fpu (
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`DIVLEN+2:0] QuotE, QuotM;
logic [`QLEN-1-(`RADIX/4):0] QuotM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
logic DivNegStickyE, DivNegStickyM;
logic DivStickyE, DivStickyM;
logic DivDoneM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
logic [`DURLEN-1:0] EarlyTermShiftM;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
@ -288,8 +287,8 @@ module fpu (
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@ -308,8 +307,8 @@ module fpu (
assign FWriteDataE = FSrcYE[`XLEN-1:0];
end else begin
logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
else assign FLoad2 = FmtM;
if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
else assign FStore2 = FmtM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
@ -381,12 +380,12 @@ module fpu (
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
.FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);

112
pipelined/src/fpu/otfc.sv Normal file
View File

@ -0,0 +1,112 @@
///////////////////////////////////////////
// otfc.sv
//
// Written: me@KatherineParry.com, cturek@hmc.edu
// Modified:7/14/2022
//
// Purpose: On the fly conversion
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module otfc2 (
input logic qp, qz,
input logic [`QLEN-1:0] Q, QM,
output logic [`QLEN-1:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
// Use this otfc for division only.
logic [`QLEN-2:0] QR, QMR;
assign QR = Q[`QLEN-2:0];
assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM
always_comb begin
if (qp) begin
QNext = {QR, 1'b1};
QMNext = {QR, 1'b0};
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
endmodule
module otfc4 (
input logic [3:0] q,
input logic [`QLEN-1:0] Q, QM,
output logic [`QLEN-1:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`QLEN-3:0] QR, QMR;
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
assign QR = Q[`QLEN-3:0];
assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM
always_comb begin
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Quoteint is in the range [.5, 2)
endmodule

View File

@ -29,7 +29,7 @@
`include "wally-config.vh"
module postprocess(
module postprocess (
// general signals
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents
@ -48,18 +48,17 @@ module postprocess(
input logic FmaPs, // the product's sign
input logic [`NE+1:0] FmaPe, // Product exponent
input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic FmaZmSticky, // sticky bit that is calculated during alignment
input logic FmaZmS, // sticky bit that is calculated during alignment
input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter
input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
//divide signals
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
input logic DivSticky,
input logic DivNegSticky,
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivS,
input logic DivDone,
input logic [`NE+1:0] DivCalcExp,
input logic [`DIVLEN+2:0] Quot,
input logic [`NE+1:0] DivQe,
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
@ -69,7 +68,7 @@ module postprocess(
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder)
input logic IntZero, // is the input zero
// final results
output logic [`FLEN-1:0] W, // FMA final result
output logic [`FLEN-1:0] PostProcRes, // FMA final result
output logic [4:0] PostProcFlg,
output logic [`XLEN-1:0] FCvtIntRes // the int conversion result
);
@ -78,32 +77,31 @@ module postprocess(
logic Ws;
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic Nsgn;
logic [`NE+1:0] Nexp;
logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow
logic Ms;
logic [`NE+1:0] Me;
logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic S; // S bit
logic UfPlus1; // do you add one (for determining underflow flag)
logic R; // bits needed to determine rounding
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Invalid; // flags
logic UfLSBRes;
logic UfL;
logic [`FMTBITS-1:0] OutFmt;
// fma signals
logic [`NE+1:0] FmaSe; // exponent of the normalized sum
logic FmaSmZero; // is the sum zero
logic FmaSZero; // is the sum zero
logic [3*`NF+8:0] FmaShiftIn; // shift input
logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results
logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
// division singals
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
logic [`NE+1:0] DivCorrExp;
logic [`NE+1:0] Qe;
logic DivByZero;
logic DivResDenorm;
logic [`NE+1:0] DivDenormShift;
@ -152,9 +150,9 @@ module postprocess(
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
.ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSel)
@ -183,9 +181,9 @@ module postprocess(
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
.DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
.DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
.Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -199,19 +197,19 @@ module postprocess(
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
.Xs, .Ys, .CvtCs, .Nsgn);
.Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
.FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf,
.DivSticky, .DivNegSticky, .DivDone,
.DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
.DivS, .DivDone,
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
.FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
///////////////////////////////////////////////////////////////////////////////
// Flags
@ -220,18 +218,18 @@ module postprocess(
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
.UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
.UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp,
.DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
.DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
endmodule

135
pipelined/src/fpu/qsel.sv Normal file
View File

@ -0,0 +1,135 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz//, qn
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign qp = magnitude & ~sign;
assign qz = ~magnitude;
// assign #1 qn = magnitude & sign;
endmodule
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
always_comb begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule

View File

@ -35,32 +35,29 @@ module resultsign(
input logic InfIn,
input logic FmaOp,
input logic [`NE+1:0] FmaSe,
input logic FmaSmZero,
input logic FmaSZero,
input logic Mult,
input logic R,
input logic S,
input logic Nsgn,
input logic Ms,
output logic Ws
);
logic ZeroSgn;
logic InfSgn;
logic Underflow;
// logic ResultSgnTmp;
logic Zeros;
logic Infs;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign InfSgn = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
assign Infs = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
endmodule

View File

@ -46,36 +46,32 @@ module round(
input logic [1:0] PostProcSel,
input logic CvtResDenormUf,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] Nfrac,
input logic FmaZmSticky, // addend's sticky bit
input logic ZZero, // is Z zero
input logic FmaInvA, // invert Z
input logic [`CORRSHIFTSZ-1:0] Mf,
input logic FmaZmS, // addend's sticky bit
input logic [`NE+1:0] FmaSe, // exponent of the normalized sum
input logic Nsgn, // the result's sign
input logic Ms, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NE+1:0] DivCorrExp, // the calculated expoent
input logic DivSticky, // sticky bit
input logic DivNegSticky,
input logic [`NE+1:0] Qe, // the calculated expoent
input logic DivS, // sticky bit
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NF-1:0] Rf, // Result fraction
output logic [`NE-1:0] Re, // Result exponent
output logic S, // sticky bit
output logic [`NE+1:0] Nexp,
output logic [`NE+1:0] Me,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic R, UfLSBRes // bits needed to calculate rounding
output logic R, UfL // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic L; // bit used for rounding - least significant bit of the normalized sum
logic UfCalcPlus1;
logic NormS; // normalized sum's sticky bit
logic UfS; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic UfR;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -118,61 +114,61 @@ module round(
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end
@ -180,37 +176,37 @@ module round(
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
FpRound = Mf[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
@ -222,130 +218,97 @@ module round(
always_comb
case (OutFmt)
2'h3: begin
FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
assign UfL = FpRound;
// determine sticky
assign S = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
// - for FMA or if division has a negitive sticky bit
assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
assign S = UfS | UfR;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even
3'b000: CalcPlus1 = R & (S| L);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down
3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up
3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = R;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (Frm)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (S | R);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
assign Minus1 = CalcMinus1 & (S | R);
assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
end else if (`FPSIZES == 3) begin
always_comb begin
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
end else if (`FPSIZES == 4) begin
always_comb begin
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
end else if (`FPSIZES == 4)
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
// determine the result to be roundned
assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSel)
2'b10: Nexp = FmaSe; // fma
2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
default: Nexp = '0;
2'b10: Me = FmaSe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Me = DivDone ? Qe : '0; // divide
default: Me = '0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
assign Re = FullResExp[`NE-1:0];
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];
endmodule

View File

@ -38,11 +38,11 @@ module roundsign(
input logic DivOp,
input logic CvtOp,
input logic CvtCs,
output logic Nsgn
output logic Ms
);
logic FmaResSgnTmp;
logic DivSgn;
logic Qs;
// is the result negitive
// if p - z is the Sum negitive
@ -52,9 +52,9 @@ module roundsign(
// assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
assign DivSgn = Xs^Ys;
assign Qs = Xs^Ys;
// Sign for rounding calulation
assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule

View File

@ -28,23 +28,22 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module lzacorrection(
module shiftcorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic DivOp,
input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExp,
input logic [`NE+1:0] DivQe,
input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic FmaKillProd, // is the product set to zero
input logic FmaSmZero,
output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction
output logic [`NE+1:0] DivCorrExp,
input logic FmaSZero,
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe,
output logic [`NE+1:0] FmaSe // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -54,16 +53,16 @@ module lzacorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
endmodule

View File

@ -29,17 +29,17 @@
`include "wally-config.vh"
module resultselect(
module specialcase(
input logic Xs, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic XNaN, YNaN, ZNaN, // inputs are NaN
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic NaNIn,
input logic XInf, YInf,
input logic XZero,
input logic IntZero,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
@ -53,10 +53,10 @@ module resultselect(
input logic IntInvalid, Invalid, Overflow, // flags
input logic CvtResUf,
input logic [`NE-1:0] Re, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent
input logic [`NE+1:0] FullRe, // Res exponent
input logic [`NF-1:0] Rf, // Res fraction
input logic [`XLEN+1:0] CvtNegRes, // the negation of the result
output logic [`FLEN-1:0] W, // final res
output logic [`FLEN-1:0] PostProcRes, // final res
output logic [`XLEN-1:0] FCvtIntRes // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
@ -231,11 +231,11 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754) begin
assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
YNaN&~CvtOp ? YNaNRes :
ZNaN&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
@ -243,7 +243,7 @@ module resultselect(
KillRes ? UfRes :
NormRes;
end else begin
assign W = NaNIn|Invalid ? InvalidRes :
assign PostProcRes = NaNIn|Invalid ? InvalidRes :
SelOfRes ? OfRes :
KillRes ? UfRes :
NormRes;

View File

@ -1,312 +0,0 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtradix4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`DIVLEN+2:0] Quot,
output logic [`DIVLEN+3:0] WSN, WCN,
output logic [`DIVLEN+3:0] WS, WC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
logic [3:0] q;
logic [`DIVLEN+3:0] WSA;
logic [`DIVLEN+3:0] WCA;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// *** change this for radix 4 - generate w/ stine code
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
////////////////
// Submodules //
////////////////
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
initial begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [3:0] q,
output logic [`DIVLEN+2:0] Quot
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`DIVLEN:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Quot[`DIVLEN:0];
QMR = QM[`DIVLEN:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Quoteint is in the range [.5, 2)
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
module expcalc(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

259
pipelined/src/fpu/srt.sv Normal file
View File

@ -0,0 +1,259 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srt(
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
input logic NegSticky,
output logic [`QLEN-1-(`RADIX/4):0] Quot,
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] StickyWSA,
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
/* verilator lint_off UNOPTFLAT */
logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0];
logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0];
logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
logic [`QLEN-1:0] QMMux;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
if (`RADIX == 2) begin : nextw
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
end else begin
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
end
mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Divisor Selections
// - choose the negitive version of what's being selected
assign DBar = ~D;
if(`RADIX == 4) begin : d2
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
end
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
divinteration divinteration(.D, .DBar, .D2, .DBar2,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
if(i<(`DIVCOPIES-1)) begin
if (`RADIX==2)begin
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0};
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0};
end else begin
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
end
assign Q[i+1] = QNext[i];
assign QM[i+1] = QMNext[i];
end
end
endgenerate
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
assign FirstWS = WS[0];
assign FirstWC = WC[0];
if(`RADIX==2)
if (`DIVCOPIES == 1)
assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0};
else
assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
////////////////
// Submodules //
////////////////
/* verilator lint_off UNOPTFLAT */
module divinteration (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] DBar, D2, DBar2,
input logic [`QLEN-1:0] Q, QM,
input logic [`DIVLEN+3:0] WS, WC,
output logic [`QLEN-1:0] QNext, QMNext,
output logic [`DIVLEN+3:0] WSA, WCA
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] Dsel;
logic [3:0] q;
logic qp, qz;//, qn;
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
if(`RADIX == 2) begin : qsel
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn);
end else begin
qsel4 qsel4(.D, .WS, .WC, .q);
end
if(`RADIX == 2) begin : dsel
assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D);
end else begin
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase
end
// Partial Product Generation
// WSA, WCA = WS + WC - qD
if (`RADIX == 2) begin : csa
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
end else begin
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
end
if (`RADIX == 2) begin : otfc
otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
end else begin
otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
end
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
module expcalc(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

View File

@ -33,37 +33,45 @@
module srtfsm(
input logic clk,
input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
input logic StallE,
input logic StallM,
input logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
input logic StallE,
input logic StallM,
input logic [`DIVLEN+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivStickyE,
output logic DivDone,
output logic DivNegStickyE,
output logic NegSticky,
output logic DivBusy
);
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
statetype state;
logic [$clog2(`DIVLEN/2+3)-1:0] step;
logic [`DURLEN-1:0] step;
logic WZero;
//logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DIVLEN+3:0] W;
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = ~WZero;
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
// calculate sticky bit
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2)
assign DivStickyE = |W&~(StickyWSA == WS);
else
assign DivStickyE = |W;
assign DivDone = (state == DONE);
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = step;
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftE = step;
always_ff @(posedge clk) begin
if (reset) begin
@ -73,7 +81,7 @@ module srtfsm(
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
else state <= #1 BUSY;
end else if (state == BUSY) begin
if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
state <= #1 DONE;
end
step <= step - 1;

View File

@ -31,11 +31,11 @@
`include "wally-config.vh"
module srtpreproc (
input logic [`NF:0] XManE, YManE,
input logic [`NF:0] Xm, Ym,
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
output logic [`DURLEN-1:0] Dur
);
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
@ -49,24 +49,33 @@ module srtpreproc (
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
lzc #(`NF+1) lzcA (Xm, XZeroCnt);
lzc #(`NF+1) lzcB (Ym, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign X = PreprocX;
assign Dpreproc = PreprocY;
assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
assign Dur = (`DURLEN)'(`FPDUR);
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
// radix 2 radix 4
// 1 copies DIVLEN+2 DIVLEN+2/2
// 2 copies DIVLEN+2/2 DIVLEN+2/2*2
// 4 copies DIVLEN+2/4 DIVLEN+2/2*4
// 8 copies DIVLEN+2/8 DIVLEN+2/2*8
// DIVRESLEN = DIVLEN or DIVLEN+2
// r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES)
endmodule

View File

@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
/* verilator lint_off CMPCONST */
/* verilator lint_off WIDTH */
int i;
logic [31:0] i;
always_comb begin
i = 0;
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one

View File

@ -226,7 +226,7 @@ module ifu (
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
.CacheFetchLine(ICacheFetchLine), .FStore2(),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),

View File

@ -58,7 +58,7 @@ module lsu (
input logic sfencevmaM,
// fpu
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FStore2,
input logic FpLoadStoreM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
@ -192,7 +192,8 @@ module lsu (
// Memory System
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
/////////////////////////////////////////////////////////////////////////////////////////////
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
logic [`LLEN-1:0] FinalWriteDataM;
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`LLEN-1:0] ReadDataWordMuxM;
logic IgnoreRequest;
@ -202,7 +203,7 @@ module lsu (
if (`DMEM == `MEM_TIM) begin : dtim
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
.DCacheMiss, .DCacheAccess);
@ -230,15 +231,19 @@ module lsu (
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA));
if(CACHE_ENABLED) begin : dcache
if (`LLEN>`XLEN)
mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
else
assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
.ByteMask(ByteMaskM), .WordCount, .FStore2,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
@ -286,10 +291,10 @@ module lsu (
// swap the bytes when read from big-endian memory
/////////////////////////////////////////////////////////////////////////////////////////////
if (`BIGENDIAN_SUPPORTED) begin:endian
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
end else begin
assign FinalWriteDataM = LittleEndianWriteDataM;
assign IEUWriteDataM = LittleEndianWriteDataM;
assign LittleEndianReadDataWordM = ReadDataWordM;
end

View File

@ -172,8 +172,8 @@ module plic_apb (
end
// pending interrupt requests
//assign nextIntPending = (intPending | requests) & ~intInProgress; //
assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals

View File

@ -93,7 +93,7 @@ module wallypipelinedcore (
logic FStallD;
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic FLoad2;
logic FStore2;
logic [`FLEN-1:0] FWriteDataM;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
@ -259,7 +259,7 @@ module wallypipelinedcore (
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadStoreM,
.FWriteDataM, .FLoad2,
.FWriteDataM, .FStore2,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataW, .FlushDCacheM,
@ -400,7 +400,7 @@ module wallypipelinedcore (
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadStoreM,
.FLoad2,
.FStore2,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory

View File

@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp,
// Print r in standard double format
fprintf(fptr, "%03x", rExp|(rSign<<11));
printhex(fptr, rFrac);
fprintf(fptr, "_");
// Spacing for testbench, value doesn't matter
fprintf(fptr, "%016x", 0);
fprintf(fptr, "\n");
}

View File

@ -1,2 +1 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t10'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -4)
printf("0000");
else if ((pla.tot) >= -13)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 14)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -15)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 15)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -16)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 16)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -18)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 18)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -22)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -24)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

Binary file not shown.

View File

@ -1,190 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -4)
printf("0");
else if ((pla.tot) >= -13)
printf("2");
else
printf("1");
break;
case 1:
if ((pla.tot) >= 14)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -15)
printf("2");
else
printf("1");
break;
case 2:
if ((pla.tot) >= 15)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -16)
printf("2");
else
printf("1");
break;
case 3:
if ((pla.tot) >= 16)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -18)
printf("2");
else
printf("1");
break;
case 4:
if ((pla.tot) >= 18)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 5:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 6:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -22)
printf("2");
else
printf("1");
break;
case 7:
if ((pla.tot) >= 24)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -24)
printf("2");
else
printf("1");
break;
default: printf ("X");
}
printf("\n");
(pla.tot)++;
}
(pla.divisor)++;
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t11'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -26)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 28)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -10)
printf("0000");
else if ((pla.tot) >= -28)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -32)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -34)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 36)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -36)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -40)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -44)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 44)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -46)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -30,15 +30,11 @@ void main(void)
FILE *fptr;
double aFrac, rFrac;
int aExp, rExp;
double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001,
<<<<<<< Updated upstream
1/1.1, 1/1.5, 1/1.25, 1/1.125};
=======
1.1, 1.5, 1.01, 1.001, 1.0001,
2/1.1, 2/1.5, 2/1.25, 2/1.125};
>>>>>>> Stashed changes
double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
11, 12, 13, 14, 15, 16};
int i;
int bias = 1023;
@ -51,10 +47,19 @@ void main(void)
for (i=0; i<ENTRIES; i++) {
aFrac = mans[i];
aExp = exps[i] + bias;
rFrac = sqrt(aFrac * pow(2, aExp - bias));
rFrac = sqrt(aFrac * pow(2, exps[i]));
rExp = (int) (log(rFrac)/log(2) + bias);
output(fptr, aExp, aFrac, rExp, rFrac);
}
// WS
// Test 1: sqrt(1) = 1 0000 0000 0000 00
// Test 2: sqrt(1849/1024) = 43/32 0000 1100 1110 01
// Test 3: sqrt(5) 0000 0100 0000 00
// Test 4: sqrt(9) = 3 1111 1001 0000 00
// Test 5: sqrt(17) 0000 0001 0000 00
// Test 6: sqrt(56) 1111 1110 0000 00
// Test 7: sqrt(120) 0000 1110 0000 00
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
@ -69,14 +74,23 @@ void main(void)
void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
{
// Print a in standard double format
fprintf(fptr, "%03x", aExp);
printhex(fptr, aFrac);
fprintf(fptr, "_");
// Spacing for testbench, value doesn't matter
fprintf(fptr, "%016x", 0);
fprintf(fptr, "_");
// Print r in standard double format
fprintf(fptr, "%03x", rExp);
printhex(fptr, rFrac);
fprintf(fptr, "_");
// Spacing for testbench, value doesn't matter
fprintf(fptr, "%016x", 0);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, double m)

View File

@ -1,5 +1,5 @@
add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/*
add wave -noupdate /testbench/srt/sotfc2/*
add wave -noupdate /testbench/srt/preproc/*
add wave -noupdate /testbench/srt/divcounter/*

View File

@ -29,8 +29,6 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk,
@ -49,18 +47,19 @@ module srt (
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic rsign, done,
output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
logic qp, qz, qm; // quotient is +1, 0, or -1
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN+3:0] X, Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic qp, qz, qn; // quotient is +1, 0, or -1
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
logic intSign;
logic intSign;
logic cin;
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
@ -76,23 +75,31 @@ module srt (
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn);
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
flopen #(7) durflop(clk, Start, calcDur, dur);
counter divcounter(clk, Start, dur, done);
srtcounter divcounter(clk, Start, dur, done);
// Divisor Selection logic
assign Db = ~D;
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
// If only implementing division, use divide otfc
// otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
// otherwise use sotfc
creg sotfcC(clk, Start, C);
sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
// Adder input selection
assign AddIn = Sqrt ? F : Dsel;
// Partial Product Generation
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
assign cin = ~Sqrt & qp;
csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
signcalc signcalc(.XSign, .YSign, .calcSign);
@ -121,42 +128,53 @@ module srtpreproc (
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
logic [`NF+4:0] SqrtX;
// Generate positive integer inputs if they are signed
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// Calculate leading zeros of integer inputs
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
// Make integers have DIVLEN bits
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
// Shift integers to have leading ones
assign PreprocA = ExtraA << (zeroCntA + 1);
assign PreprocB = ExtraB << (zeroCntB + 1);
// Make mantissas have DIVLEN bits
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
// Selecting correct divider inputs
assign DivX = Int ? PreprocA : PreprocX;
assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
assign X = Sqrt ? SqrtX : {4'b0001, DivX};
assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
assign D = {4'b0001, Int ? PreprocB : PreprocY};
// Integer exponent and sign calculations
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
// Number of cycles of divider
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
input logic [`DIVLEN+3:`DIVLEN-1] ps, pc,
input logic Sqrt,
output logic qp, qz, qn
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic [`DIVLEN+3:`DIVLEN-1] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
@ -167,8 +185,8 @@ module qsel2 ( // *** eventually just change to 4 bits
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1])))));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
@ -180,7 +198,7 @@ module qsel2 ( // *** eventually just change to 4 bits
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
assign #1 qn = magnitude & sign;
endmodule
////////////////////////////////////
@ -191,45 +209,36 @@ module fsel2 (
input logic [`DIVLEN+3:0] C, S, SM,
output logic [`DIVLEN+3:0] F
);
logic [`DIVLEN+3:0] FP, FN;
logic [`DIVLEN+3:0] FP, FN, FZ;
// Generate for both positive and negative bits
assign FP = ~S & C;
assign FN = SM | (C & (~C << 2));
assign FZ = {(`DIVLEN+4){1'b0}};
// Choose which adder input will be used
assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0});
assign F = sp ? FP : (sn ? FN : FZ);
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc2 #(parameter N=64) (
module otfc2 #(parameter N=66) (
input logic clk,
input logic Start,
input logic qp, qz, qm,
output logic [N-1:0] r
input logic qp, qz, qn,
output logic [N-3:0] r
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// bits to the quotient as they come.
// Use this otfc for division only.
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin
@ -241,35 +250,76 @@ module otfc2 #(parameter N=64) (
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qm is
end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
assign r = Q[N] ? Q[N-1:2] : Q[N-2:1];
endmodule
///////////////////////////////
// Square Root OTFC, Radix 2 //
///////////////////////////////
module softc2(
input logic clk,
input logic Start,
input logic sp, sn,
output logic S,
module sotfc2(
input logic clk,
input logic Start,
input logic sp, sn,
input logic [`DIVLEN+3:0] C,
output logic [`DIVLEN-2:0] Sq,
output logic [`DIVLEN+3:0] F
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
flop #(`DIVLEN+4) Sreg(clk, SMux, S);
always_comb begin
if (sp) begin
SNext = S | ((C << 1) & ~(C << 2));
SMNext = S;
end else if (sn) begin
SNext = SM | ((C << 1) & ~(C << 2));
SMNext = SM;
end else begin // If sp and sn are not true, then sz is
SNext = S;
SMNext = SM | ((C << 1) & ~(C << 2));
end
end
assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
fsel2 fsel(sp, sn, C, S, SM, F);
endmodule
//////////////////////////
// C Register for SOTFC //
//////////////////////////
module creg(input logic clk,
input logic Start,
output logic [`DIVLEN+3:0] C
);
logic [`DIVLEN+3:0] CMux;
mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
flop #(`DIVLEN+4) cflop(clk, CMux, C);
endmodule
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done);
module srtcounter(input logic clk,
input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done
);
logic [$clog2(`XLEN+1)-1:0] count;
logic [$clog2(`XLEN+1)-1:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you

View File

@ -1,4 +1,4 @@
`define DIVLEN 64
`include "wally-config.vh"
/////////////
// counter //
@ -39,37 +39,27 @@ endmodule
// testbench //
//////////
module testbench;
logic clk;
logic req;
logic done;
logic Int;
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r;
logic [63:0] rInt;
logic [`DIVLEN-1:0] Quot;
logic clk;
logic req;
logic done;
logic Int;
logic [`XLEN-1:0] a, b;
logic [`NF-1:0] afrac, bfrac;
logic [`NE-1:0] aExp, bExp;
logic asign, bsign;
logic [`NF-1:0] r;
logic [`XLEN-1:0] rInt;
logic [`DIVLEN-2:0] Quot;
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 64+64+64+64;
// INT TEST SIZES
// `define memrem 63:0
// `define memr 127:64
// `define memb 191:128
// `define mema 255:192
// FLOAT TEST SIZES
// `define memr 63:0
// `define memb 127:64
// `define mema 191:128
// SQRT TEST SIZES
`define memr 63:0
`define mema 127:64
// Test sizes
`define memrem 63:0
`define memr 127:64
`define memb 191:128
`define mema 255:192
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
@ -118,16 +108,16 @@ module testbench;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rInt = Quot;
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = {1'b1, Quot};
req <= #5 1;
end
// Apply directed test vectors read from file.
always @(posedge clk) begin
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rInt = Quot;
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = {1'b1, Quot};
if (done) begin
if (~Int & ~Sqrt) begin
req <= #5 1;
@ -165,15 +155,14 @@ module testbench;
req <= #5 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors + 1;
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx) begin
$display("%d Tests completed successfully", testnum);
$display("%d Tests completed successfully", testnum-errors);
$stop; end
end
end

View File

@ -80,17 +80,17 @@ module testbenchfp;
logic CvtResSgnE;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic [`QLEN-1-(`RADIX/4):0] Quot;
logic CvtResDenormUfE;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
logic [`DURLEN-1:0] EarlyTermShift;
logic DivStart, DivBusy;
logic reset = 1'b0;
logic [`DIVLEN-1:0] DivX;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WSN, WS;
logic [`DIVLEN+3:0] WCN, WC;
logic [`DIVLEN+3:0] NextWSN, WS;
logic [`DIVLEN+3:0] NextWCN, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DURLEN-1:0] Dur;
// in-between FMA signals
logic Mult;
@ -679,15 +679,15 @@ module testbenchfp;
.Pe, .ZmSticky, .KillProd);
postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky,
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
.XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
.FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
.FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal),
.XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
@ -695,11 +695,10 @@ module testbenchfp;
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
.Quot, .Rem(), .DivCalcExpM(DivCalcExp));
divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp),
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart),
.StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp),
.EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone);
assign CmpFlg[3:0] = 0;
@ -854,7 +853,7 @@ end
// check if result is correct
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);

View File

@ -114,6 +114,7 @@ logic [3:0] dummy;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
// "wally32d": if (`D_SUPPORTED) tests = wally32d;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"wally32a": if (`A_SUPPORTED) tests = wally32a;
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;

View File

@ -34,7 +34,7 @@
string tvpaths[] = '{
"../../addins/imperas-riscv-tests/work/",
"../../tests/riscof/work/riscv-arch-test/",
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/",
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", //
"../../tests/imperas-riscv-tests/work/",
"../../benchmarks/coremark/work/",
"../../addins/embench-iot/"

View File

@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1)
cp -f ../pipelined/regression/power.saif .
endif
dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out
rm -rf $(OUTPUTDIR)/hdl
# rm -rf $(OUTPUTDIR)/hdl
rm -rf $(OUTPUTDIR)/WORK
rm -rf $(OUTPUTDIR)/alib-52

View File

@ -11,6 +11,7 @@ import numpy as np
from ppa.ppaAnalyze import noOutliers
from matplotlib import ticker
import argparse
import os
def synthsintocsv():
@ -59,6 +60,7 @@ def synthsintocsv():
writer.writerow([width, config, special, tech, freq, delay, area])
file.close()
def synthsfromcsv(filename):
Synth = namedtuple("Synth", "width config special tech freq delay area")
with open(filename, newline='') as csvfile:
@ -74,10 +76,16 @@ def synthsfromcsv(filename):
allSynths[i] = Synth(*allSynths[i])
return allSynths
def freqPlot(tech, width, config):
''' plots delay, area for syntheses with specified tech, module, width
'''
current_directory = os.getcwd()
final_directory = os.path.join(current_directory, 'plots/wally')
if not os.path.exists(final_directory):
os.makedirs(final_directory)
freqsL, delaysL, areasL = ([[], []] for i in range(3))
for oneSynth in allSynths:
if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special):
@ -151,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False):
return fig
def plotFeatures(tech, width, config):
delays, areas, labels = ([] for i in range(3))
freq = techdict[tech].targfreq
@ -168,7 +177,8 @@ def plotFeatures(tech, width, config):
titlestr = tech+'_'+width+config
plt.title(titlestr)
plt.savefig('./plots/wally/features_'+titlestr+'.png')
def plotConfigs(tech, special=''):
delays, areas, labels = ([] for i in range(3))
freq = techdict[tech].targfreq
@ -207,7 +217,8 @@ def normAreaDelay(special=''):
ax.set_ylabel('Area (add32)')
ax.legend(handles = fullLeg, loc='upper left')
plt.savefig('./plots/wally/normAreaDelay.png')
def addFO4axis(fig, ax, tech):
fo4 = techdict[tech].fo4

View File

@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true"
# Due to parameterized Verilog must use analyze/elaborate and not
# read_verilog/vhdl (change to pull in Verilog and/or VHDL)
#
set alib_library_analysis_path ./$outputDir
#set alib_library_analysis_path ./$outputDir
define_design_lib WORK -path ./$outputDir/WORK
analyze -f sverilog -lib WORK $my_verilog_files
elaborate $my_toplevel -lib WORK
@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }

View File

@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
current_dir = $(shell pwd)
XLEN ?= 64
all: root build_arch # build_wally memfile
all: root build_arch #build_wally memfile
root:
mkdir -p $(work_dir)

View File

@ -108,7 +108,7 @@ class spike(pluginTemplate):
#TODO: The following assumes you are using the riscv-gcc toolchain. If
# not please change appropriately
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))
def runTests(self, testList):
@ -158,7 +158,12 @@ class spike(pluginTemplate):
# echo statement.
if self.target_run:
# set up the simulation command. Template is for spike. Please change.
simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
if ('NO_SAIL=True' in testentry['macros']):
# if the tests can't run on SAIL we copy the reference output to the src directory
reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
else:
simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
else:
simcmd = 'echo "NO RUN"'

View File

@ -1,11 +1,11 @@
hart_ids: [0]
hart0:
ISA: RV32IMAFCZicsr_Zifencei
ISA: RV32IMAFDCZicsr_Zifencei
physical_addr_sz: 32
User_Spec_Version: '2.3'
supported_xlen: [32]
misa:
reset-val: 0x40001125
reset-val: 0x4000112D
rv32:
accessible: true
mxl:
@ -23,6 +23,6 @@ hart0:
warl:
dependency_fields: []
legal:
- extensions[25:0] bitmask [0x0001125, 0x0000000]
- extensions[25:0] bitmask [0x000112D, 0x0000000]
wr_illegal:
- Unchanged