forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main
This commit is contained in:
commit
a190bc4471
@ -39,7 +39,7 @@
|
||||
|
||||
// MISA RISC-V configuration per specification
|
||||
// ZYXWVUTSRQPONMLKJIHGFEDCBA
|
||||
`define MISA 32'b0000000000101000001000100100101
|
||||
`define MISA 32'b0000000000101000001000100101101
|
||||
`define ZICSR_SUPPORTED 1
|
||||
`define ZIFENCEI_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
|
@ -95,11 +95,25 @@
|
||||
|
||||
// largest length in IEU/FPU
|
||||
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
|
||||
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
|
||||
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
|
||||
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
|
||||
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
|
||||
`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
|
||||
|
||||
// division constants
|
||||
`define RADIX 32'h2
|
||||
`define DIVCOPIES 32'h1
|
||||
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
|
||||
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
|
||||
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
|
||||
`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
|
||||
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
|
||||
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
|
||||
// one interation is required for the integer bit for minimally redundent radix-4
|
||||
`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
|
||||
`define DURLEN ($clog2(`FPDUR+1))
|
||||
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
|
||||
|
||||
|
||||
`define USE_SRAM 0
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
# fma - test fma
|
||||
# sub - test subtraction
|
||||
# div - test division
|
||||
# sqrt - test square ro
|
||||
# sqrt - test square root
|
||||
# all - test everything
|
||||
|
||||
vsim -do "do testfloat.do rv64fp mul"
|
||||
vsim -do "do testfloat.do rv64fp $1"
|
||||
|
@ -1,7 +1,9 @@
|
||||
|
||||
# cvtint - test integer conversion unit (fcvtint)
|
||||
# cvtfp - test floating-point conversion unit (fcvtfp)
|
||||
# cmp - test comparison unit's LT, LE, EQ opperations (fcmp)
|
||||
# add - test addition
|
||||
# fma - test fma
|
||||
# sub - test subtraction
|
||||
# div - test division
|
||||
# sqrt - test square root
|
||||
|
@ -1,2 +1,2 @@
|
||||
vsim -do "do wally-pipelined.do rv32gc arch32i"
|
||||
vsim -do "do wally-pipelined.do rv32gc wally32periph"
|
||||
|
||||
|
@ -1 +1 @@
|
||||
vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f"
|
||||
vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
|
||||
|
@ -9,22 +9,31 @@ add wave -noupdate /testbenchfp/Res
|
||||
add wave -noupdate /testbenchfp/Ans
|
||||
add wave -noupdate /testbenchfp/DivStart
|
||||
add wave -noupdate /testbenchfp/DivBusy
|
||||
add wave -noupdate /testbenchfp/srtfsm/state
|
||||
add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
|
||||
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
|
||||
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
|
||||
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
|
||||
|
18
pipelined/src/cache/cache.sv
vendored
18
pipelined/src/cache/cache.sv
vendored
@ -42,10 +42,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
|
||||
input logic [`PA_BITS-1:0] PAdr, // physical address
|
||||
input logic [(`XLEN-1)/8:0] ByteMask,
|
||||
input logic [`XLEN-1:0] FinalWriteData,
|
||||
input logic [`FLEN-1:0] FWriteDataM,
|
||||
input logic FLoad2,
|
||||
input logic FpLoadStoreM,
|
||||
input logic [WORDLEN-1:0] FinalWriteData,
|
||||
input logic FStore2,
|
||||
output logic CacheCommitted,
|
||||
output logic CacheStall,
|
||||
// to performance counters to cpu
|
||||
@ -72,7 +70,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
localparam SETLEN = $clog2(NUMLINES);
|
||||
localparam SETTOP = SETLEN+OFFSETLEN;
|
||||
localparam TAGLEN = `PA_BITS - SETTOP;
|
||||
localparam WORDSPERLINE = LINELEN/`XLEN;
|
||||
localparam WORDSPERLINE = LINELEN/WORDLEN;
|
||||
localparam FlushAdrThreshold = NUMLINES - 1;
|
||||
|
||||
logic SelAdr;
|
||||
@ -123,7 +121,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2,
|
||||
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
|
||||
.Invalidate(InvalidateCacheM));
|
||||
@ -162,12 +160,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (`LLEN>`XLEN)
|
||||
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
|
||||
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
|
||||
else
|
||||
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
|
||||
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
|
||||
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
|
||||
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
|
4
pipelined/src/cache/cacheway.sv
vendored
4
pipelined/src/cache/cacheway.sv
vendored
@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
input logic [$clog2(NUMLINES)-1:0] RAdr,
|
||||
input logic [`PA_BITS-1:0] PAdr,
|
||||
input logic [LINELEN-1:0] CacheWriteData,
|
||||
input logic FLoad2,
|
||||
input logic FStore2,
|
||||
input logic SetValidWay,
|
||||
input logic ClearValidWay,
|
||||
input logic SetDirtyWay,
|
||||
@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
|
||||
onehotdecoder #(LOGWPL) adrdec(
|
||||
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
|
||||
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
|
||||
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0};
|
||||
end else
|
||||
onehotdecoder #(LOGWPL) adrdec(
|
||||
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
|
||||
|
@ -1,10 +1,10 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module divshiftcalc(
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
|
||||
input logic [`FMTBITS-1:0] Fmt,
|
||||
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
|
||||
input logic [`NE+1:0] DivCalcExp,
|
||||
input logic [`DURLEN-1:0] DivEarlyTermShift,
|
||||
input logic [`NE+1:0] DivQe,
|
||||
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
|
||||
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
|
||||
output logic DivResDenorm,
|
||||
@ -14,27 +14,28 @@ module divshiftcalc(
|
||||
|
||||
// is the result denromalized
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
|
||||
assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
|
||||
assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
|
||||
|
||||
// if the result is denormalized
|
||||
// 00000000x.xxxxxx... Exp = DivCalcExp
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivCalcExp+NF+1-1
|
||||
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
|
||||
// Left shift amount = DivQe+NF+1-1
|
||||
assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivCalcExp
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivQe+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||
assign NormShift = (`NE+2)'(`NF);
|
||||
// if the shift amount is negitive then dont shift (keep sticky bit)
|
||||
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
|
||||
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
|
||||
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
|
||||
|
||||
// *** may be able to reduce shifter size
|
||||
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
|
||||
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
|
||||
|
||||
endmodule
|
||||
|
@ -43,26 +43,27 @@ module divsqrt(
|
||||
input logic StallM,
|
||||
input logic StallE,
|
||||
output logic DivStickyM,
|
||||
output logic DivNegStickyM,
|
||||
output logic DivBusy,
|
||||
output logic DivDone,
|
||||
output logic [`NE+1:0] DivCalcExpM,
|
||||
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
|
||||
output logic [`DIVLEN+2:0] QuotM
|
||||
output logic [`DURLEN-1:0] EarlyTermShiftM,
|
||||
output logic [`QLEN-1-(`RADIX/4):0] QuotM
|
||||
// output logic [`XLEN-1:0] RemM,
|
||||
);
|
||||
|
||||
logic [`DIVLEN+3:0] WSN, WCN;
|
||||
logic [`DIVLEN+3:0] NextWSN, NextWCN;
|
||||
logic [`DIVLEN+3:0] WS, WC;
|
||||
logic [`DIVLEN+3:0] StickyWSA;
|
||||
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
|
||||
logic [`DIVLEN-1:0] X;
|
||||
logic [`DIVLEN-1:0] Dpreproc;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
|
||||
logic [`DURLEN-1:0] Dur;
|
||||
logic NegSticky;
|
||||
|
||||
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
|
||||
srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
|
||||
|
||||
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
|
||||
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
|
||||
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
|
||||
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
|
||||
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
|
||||
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
|
||||
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
|
||||
.StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
|
||||
endmodule
|
@ -68,7 +68,8 @@ module fcvt (
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
|
||||
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
@ -102,10 +103,11 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
|
||||
{Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
|
||||
assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
|
||||
{Xm, {`CVTLEN-`NF{1'b0}}};
|
||||
assign LzcIn = LzcInFull[`CVTLEN-1:0];
|
||||
|
||||
lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
|
||||
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
@ -119,13 +121,13 @@ module fcvt (
|
||||
// denormalized/undeflowed result fp -> fp:
|
||||
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
|
||||
// ??? -> fp:
|
||||
// - shift left by LeadingZeros+1 - to shift till the result is normalized
|
||||
// - shift left by LeadingZeros - to shift till the result is normalized
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] :
|
||||
(LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
|
||||
(LeadingZeros);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
@ -197,14 +199,14 @@ module fcvt (
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options
|
||||
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
|
||||
// - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options
|
||||
// int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
|
||||
// Process:
|
||||
// - shifted right by XLEN (XLEN)
|
||||
// - shift left to normilize (-1-LeadingZeros)
|
||||
// - shift left to normilize (-LeadingZeros)
|
||||
// - newBias to make the biased exponent
|
||||
// oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
|
||||
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
|
||||
// oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
|
||||
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
|
@ -34,24 +34,24 @@ module flags(
|
||||
input logic XInf, YInf, ZInf, // inputs are infinity
|
||||
input logic Plus1,
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic XNaN, YNaN, // inputs are NaN
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic Sqrt, // Sqrt?
|
||||
input logic ToInt, // convert to integer
|
||||
input logic IntToFp, // convert integer to floating point
|
||||
input logic Int64, // convert to 64 bit integer
|
||||
input logic Signed, // convert to a signed integer
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
|
||||
input logic CvtOp, // conversion opperation?
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic FmaOp, // Fma opperation?
|
||||
input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] Nexp, // exponent of the normalized sum
|
||||
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] Me, // exponent of the normalized sum
|
||||
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
|
||||
input logic FmaAs, FmaPs, // the product and modified Z signs
|
||||
input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
|
||||
input logic R, UfL, S, UfPlus1, // bits used to determine rounding
|
||||
output logic DivByZero,
|
||||
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
|
||||
output logic [4:0] PostProcFlg // flags
|
||||
@ -73,30 +73,30 @@ module flags(
|
||||
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
|
||||
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
|
||||
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
|
||||
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
|
||||
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
|
||||
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
|
||||
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
|
||||
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
|
||||
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
|
||||
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
|
||||
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
|
||||
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
|
||||
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
|
||||
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
|
||||
endcase
|
||||
// a left shift of intlen+1 is still in range but any more than that is an overflow
|
||||
// inital: | 64 0's | XLEN |
|
||||
@ -110,14 +110,14 @@ module flags(
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
end
|
||||
|
||||
// if the result is greater than or equal to the max exponent(not taking into account sign)
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
@ -127,11 +127,11 @@ module flags(
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
|
||||
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
|
||||
assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
@ -153,7 +153,7 @@ module flags(
|
||||
// | | | | or the res rounds up out of bounds
|
||||
// | | | | and the res didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
|
||||
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
|
||||
// |
|
||||
// or when the positive res rounds up out of range
|
||||
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
|
||||
|
@ -51,7 +51,6 @@ module fma(
|
||||
logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1)
|
||||
logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted
|
||||
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed
|
||||
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
@ -70,20 +69,21 @@ module fma(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
|
||||
.Am, .ZmSticky, .KillProd);
|
||||
|
||||
// calculate the signs and take the opperation into account
|
||||
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
|
||||
|
||||
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
|
||||
.Am, .ZmSticky, .KillProd);
|
||||
|
||||
|
||||
|
||||
// ///////////////////////////////////////////////////////////////////////////////
|
||||
// // Addition/LZA
|
||||
// ///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
|
||||
add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
|
||||
|
||||
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
|
||||
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
|
||||
endmodule
|
||||
|
||||
|
||||
@ -172,7 +172,7 @@ module align(
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
|
||||
|
||||
assign KillProd = ACnt[`NE+1]|XZero|YZero;
|
||||
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
|
||||
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
|
||||
|
||||
always_comb
|
||||
@ -183,7 +183,7 @@ module align(
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if (KillProd) begin
|
||||
ZmShifted = ZmPreshifted;
|
||||
ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
|
||||
ZmSticky = ~(XZero|YZero);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
@ -221,14 +221,14 @@ module add(
|
||||
input logic [2*`NF+1:0] Pm, // the product's mantissa
|
||||
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
|
||||
input logic KillProd, // should the product be set to 0
|
||||
input logic XZero, YZero, // is the input zero
|
||||
input logic ZmSticky,
|
||||
output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted
|
||||
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
|
||||
output logic NegSum, // was the sum negitive
|
||||
output logic InvA, // do you invert the aligned addend
|
||||
output logic [3*`NF+5:0] Sm, // the positive sum
|
||||
output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum
|
||||
output logic [3*`NF+5:0] Sm // the positive sum
|
||||
);
|
||||
logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Addition
|
||||
@ -243,13 +243,14 @@ module add(
|
||||
assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign PmKilled = Pm&{2*`NF+2{~KillProd}};
|
||||
|
||||
|
||||
|
||||
// Do the addition
|
||||
// - calculate a positive and negitive sum in parallel
|
||||
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
|
||||
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
|
||||
// Zsticky Psticky
|
||||
// PreSum -1 = don't add 1 +1 = add 2
|
||||
// NegPreSum +1 = add 2 -1 = don't add 1
|
||||
// for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
|
||||
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
|
||||
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = PreSum[3*`NF+6];
|
||||
@ -261,7 +262,7 @@ endmodule
|
||||
|
||||
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
|
||||
input logic [3*`NF+6:0] A, // addend
|
||||
input logic [2*`NF+1:0] P, // product
|
||||
input logic [2*`NF+3:0] P, // product
|
||||
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
|
||||
);
|
||||
|
||||
@ -273,12 +274,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
|
||||
assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
|
||||
assign G[3*`NF+6:2*`NF+4] = 0;
|
||||
assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
|
||||
assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
|
||||
assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
|
||||
assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
|
||||
assign T[1:0] = A[1:0];
|
||||
assign G[1:0] = 0;
|
||||
assign Z[1:0] = ~A[1:0];
|
||||
assign T[2*`NF+3:0] = A[2*`NF+3:0]^P;
|
||||
assign G[2*`NF+3:0] = A[2*`NF+3:0]&P;
|
||||
assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P;
|
||||
|
||||
|
||||
// Apply function to determine Leading pattern
|
||||
|
@ -35,9 +35,8 @@ module fmashiftcalc(
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic FmaKillProd, // is the product set to zero
|
||||
input logic ZDenorm,
|
||||
output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
output logic FmaSmZero, // is the result denormalized - calculated before LZA corection
|
||||
output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
output logic FmaSZero, // is the result denormalized - calculated before LZA corection
|
||||
output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
|
||||
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
|
||||
@ -50,35 +49,36 @@ module fmashiftcalc(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign FmaSmZero = ~(|FmaSm);
|
||||
assign FmaSZero = ~(|FmaSm);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
|
||||
// ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
|
||||
assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
|
||||
|
||||
//convert the sum's exponent into the proper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FmaConvNormSumExp = NormSumExp;
|
||||
assign FmaNe = NormSumExp;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
`FMT: FmaConvNormSumExp = NormSumExp;
|
||||
`FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
`FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
|
||||
default: FmaConvNormSumExp = {`NE+2{1'bx}};
|
||||
`FMT: FmaNe = NormSumExp;
|
||||
`FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
`FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
|
||||
default: FmaNe = {`NE+2{1'bx}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
2'h3: FmaConvNormSumExp = NormSumExp;
|
||||
2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h3: FmaNe = NormSumExp;
|
||||
2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -90,7 +90,7 @@ module fmashiftcalc(
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
|
||||
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
@ -98,7 +98,7 @@ module fmashiftcalc(
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
|
||||
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
|
||||
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
@ -110,9 +110,9 @@ module fmashiftcalc(
|
||||
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
|
||||
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
|
||||
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
|
||||
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
|
||||
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
|
||||
default: FmaPreResultDenorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
@ -129,10 +129,10 @@ module fmashiftcalc(
|
||||
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
|
||||
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
|
||||
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
|
||||
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
|
||||
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
|
||||
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
|
||||
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
|
||||
endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
|
||||
end
|
||||
|
||||
@ -144,13 +144,13 @@ module fmashiftcalc(
|
||||
// - if kill prod dont add to exp
|
||||
|
||||
// Determine if the result is denormal
|
||||
// assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
|
||||
// assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// set and calculate the shift input and amount
|
||||
// - shift once if killing a product and the result is denormalized
|
||||
assign FmaShiftIn = {3'b0, FmaSm};
|
||||
assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift;
|
||||
assign FmaShiftAmt = FmaNCnt+DenormShift;
|
||||
endmodule
|
||||
|
@ -42,7 +42,7 @@ module fpu (
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled?
|
||||
output logic FRegWriteM, // FP register write enable
|
||||
output logic FpLoadStoreM, // Fp load instruction?
|
||||
output logic FLoad2,
|
||||
output logic FStore2,
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, // integer register write enables
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
@ -125,12 +125,11 @@ module fpu (
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
|
||||
//divide signals
|
||||
logic [`DIVLEN+2:0] QuotE, QuotM;
|
||||
logic [`QLEN-1-(`RADIX/4):0] QuotM;
|
||||
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
|
||||
logic DivNegStickyE, DivNegStickyM;
|
||||
logic DivStickyE, DivStickyM;
|
||||
logic DivDoneM;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
|
||||
logic [`DURLEN-1:0] EarlyTermShiftM;
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
@ -288,8 +287,8 @@ module fpu (
|
||||
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
|
||||
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
|
||||
.StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
|
||||
.EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
|
||||
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
|
||||
.EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
|
||||
// other FP execution units
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
|
||||
@ -308,8 +307,8 @@ module fpu (
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
end else begin
|
||||
logic [`FLEN-1:0] FWriteDataE;
|
||||
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
|
||||
else assign FLoad2 = FmtM;
|
||||
if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
|
||||
else assign FStore2 = FmtM;
|
||||
|
||||
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
|
||||
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
|
||||
@ -381,12 +380,12 @@ module fpu (
|
||||
|
||||
assign FpLoadStoreM = FResSelM[1];
|
||||
|
||||
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
|
||||
.FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
|
||||
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
|
||||
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
|
||||
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
|
||||
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
|
||||
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
|
||||
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
|
||||
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
|
112
pipelined/src/fpu/otfc.sv
Normal file
112
pipelined/src/fpu/otfc.sv
Normal file
@ -0,0 +1,112 @@
|
||||
///////////////////////////////////////////
|
||||
// otfc.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:7/14/2022
|
||||
//
|
||||
// Purpose: On the fly conversion
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module otfc2 (
|
||||
input logic qp, qz,
|
||||
input logic [`QLEN-1:0] Q, QM,
|
||||
output logic [`QLEN-1:0] QNext, QMNext
|
||||
);
|
||||
// The on-the-fly converter transfers the quotient
|
||||
// bits to the quotient as they come.
|
||||
// Use this otfc for division only.
|
||||
logic [`QLEN-2:0] QR, QMR;
|
||||
|
||||
assign QR = Q[`QLEN-2:0];
|
||||
assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM
|
||||
|
||||
always_comb begin
|
||||
if (qp) begin
|
||||
QNext = {QR, 1'b1};
|
||||
QMNext = {QR, 1'b0};
|
||||
end else if (qz) begin
|
||||
QNext = {QR, 1'b0};
|
||||
QMNext = {QMR, 1'b1};
|
||||
end else begin // If qp and qz are not true, then qn is
|
||||
QNext = {QMR, 1'b1};
|
||||
QMNext = {QMR, 1'b0};
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module otfc4 (
|
||||
input logic [3:0] q,
|
||||
input logic [`QLEN-1:0] Q, QM,
|
||||
output logic [`QLEN-1:0] QNext, QMNext
|
||||
);
|
||||
|
||||
// The on-the-fly converter transfers the quotient
|
||||
// bits to the quotient as they come.
|
||||
//
|
||||
// This code follows the psuedocode presented in the
|
||||
// floating point chapter of the book. Right now,
|
||||
// it is written for Radix-4 division.
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [`QLEN-3:0] QR, QMR;
|
||||
|
||||
// shift Q (quotent) and QM (quotent-1)
|
||||
// if q = 2 Q = {Q, 10} QM = {Q, 01}
|
||||
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
|
||||
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
|
||||
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
|
||||
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
|
||||
|
||||
assign QR = Q[`QLEN-3:0];
|
||||
assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM
|
||||
always_comb begin
|
||||
if (q[3]) begin // +2
|
||||
QNext = {QR, 2'b10};
|
||||
QMNext = {QR, 2'b01};
|
||||
end else if (q[2]) begin // +1
|
||||
QNext = {QR, 2'b01};
|
||||
QMNext = {QR, 2'b00};
|
||||
end else if (q[1]) begin // -1
|
||||
QNext = {QMR, 2'b11};
|
||||
QMNext = {QMR, 2'b10};
|
||||
end else if (q[0]) begin // -2
|
||||
QNext = {QMR, 2'b10};
|
||||
QMNext = {QMR, 2'b01};
|
||||
end else begin // 0
|
||||
QNext = {QR, 2'b00};
|
||||
QMNext = {QMR, 2'b11};
|
||||
end
|
||||
end
|
||||
// Final Quoteint is in the range [.5, 2)
|
||||
|
||||
endmodule
|
@ -29,7 +29,7 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess(
|
||||
module postprocess (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [`NE-1:0] Ze, // input exponents
|
||||
@ -48,18 +48,17 @@ module postprocess(
|
||||
input logic FmaPs, // the product's sign
|
||||
input logic [`NE+1:0] FmaPe, // Product exponent
|
||||
input logic [3*`NF+5:0] FmaSm, // the positive sum
|
||||
input logic FmaZmSticky, // sticky bit that is calculated during alignment
|
||||
input logic FmaZmS, // sticky bit that is calculated during alignment
|
||||
input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter
|
||||
input logic FmaNegSum, // was the sum negitive
|
||||
input logic FmaInvA, // do you invert Z
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
|
||||
//divide signals
|
||||
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
|
||||
input logic DivSticky,
|
||||
input logic DivNegSticky,
|
||||
input logic [`DURLEN-1:0] DivEarlyTermShift,
|
||||
input logic DivS,
|
||||
input logic DivDone,
|
||||
input logic [`NE+1:0] DivCalcExp,
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
input logic [`NE+1:0] DivQe,
|
||||
input logic [`QLEN-1-(`RADIX/4):0] DivQm,
|
||||
// conversion signals
|
||||
input logic CvtCs, // the result's sign
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent
|
||||
@ -69,7 +68,7 @@ module postprocess(
|
||||
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic IntZero, // is the input zero
|
||||
// final results
|
||||
output logic [`FLEN-1:0] W, // FMA final result
|
||||
output logic [`FLEN-1:0] PostProcRes, // FMA final result
|
||||
output logic [4:0] PostProcFlg,
|
||||
output logic [`XLEN-1:0] FCvtIntRes // the int conversion result
|
||||
);
|
||||
@ -78,32 +77,31 @@ module postprocess(
|
||||
logic Ws;
|
||||
logic [`NF-1:0] Rf; // Result fraction
|
||||
logic [`NE-1:0] Re; // Result exponent
|
||||
logic Nsgn;
|
||||
logic [`NE+1:0] Nexp;
|
||||
logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
|
||||
logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow
|
||||
logic Ms;
|
||||
logic [`NE+1:0] Me;
|
||||
logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
|
||||
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic S; // S bit
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic R; // bits needed to determine rounding
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
logic IntInvalid, Overflow, Invalid; // flags
|
||||
logic UfLSBRes;
|
||||
logic UfL;
|
||||
logic [`FMTBITS-1:0] OutFmt;
|
||||
// fma signals
|
||||
logic [`NE+1:0] FmaSe; // exponent of the normalized sum
|
||||
logic FmaSmZero; // is the sum zero
|
||||
logic FmaSZero; // is the sum zero
|
||||
logic [3*`NF+8:0] FmaShiftIn; // shift input
|
||||
logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
// division singals
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
|
||||
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
|
||||
logic [`NE+1:0] DivCorrExp;
|
||||
logic [`NE+1:0] Qe;
|
||||
logic DivByZero;
|
||||
logic DivResDenorm;
|
||||
logic [`NE+1:0] DivDenormShift;
|
||||
@ -152,9 +150,9 @@ module postprocess(
|
||||
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
|
||||
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
|
||||
.ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
|
||||
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
always_comb
|
||||
case(PostProcSel)
|
||||
@ -183,9 +181,9 @@ module postprocess(
|
||||
|
||||
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
|
||||
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
|
||||
.DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
|
||||
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
|
||||
.DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
|
||||
.Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
@ -199,19 +197,19 @@ module postprocess(
|
||||
|
||||
|
||||
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
|
||||
.Xs, .Ys, .CvtCs, .Nsgn);
|
||||
.Xs, .Ys, .CvtCs, .Ms);
|
||||
|
||||
round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
|
||||
.FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf,
|
||||
.DivSticky, .DivNegSticky, .DivDone,
|
||||
.DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
|
||||
round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
.Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
|
||||
.DivS, .DivDone,
|
||||
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
|
||||
.FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
|
||||
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
@ -220,18 +218,18 @@ module postprocess(
|
||||
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
|
||||
.XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
|
||||
.UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
|
||||
.Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
|
||||
.UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
|
||||
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
|
||||
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
|
||||
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
|
||||
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
|
||||
.XInf, .YInf, .DivOp,
|
||||
.DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
|
||||
.DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
|
||||
|
||||
endmodule
|
||||
|
135
pipelined/src/fpu/qsel.sv
Normal file
135
pipelined/src/fpu/qsel.sv
Normal file
@ -0,0 +1,135 @@
|
||||
///////////////////////////////////////////
|
||||
// srt.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module qsel2 ( // *** eventually just change to 4 bits
|
||||
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
|
||||
output logic qp, qz//, qn
|
||||
);
|
||||
|
||||
logic [`DIVLEN+3:`DIVLEN] p, g;
|
||||
logic magnitude, sign, cout;
|
||||
|
||||
// The quotient selection logic is presented for simplicity, not
|
||||
// for efficiency. You can probably optimize your logic to
|
||||
// select the proper divisor with less delay.
|
||||
|
||||
// Quotient equations from EE371 lecture notes 13-20
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
|
||||
assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
|
||||
assign sign = p[`DIVLEN+3] ^ cout;
|
||||
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
|
||||
(ps[52]^pc[52]));
|
||||
assign #1 sign = (ps[55]^pc[55])^
|
||||
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
|
||||
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
|
||||
(ps[52]&pc[52]))))); */
|
||||
|
||||
// Produce quotient = +1, 0, or -1
|
||||
assign qp = magnitude & ~sign;
|
||||
assign qz = ~magnitude;
|
||||
// assign #1 qn = magnitude & sign;
|
||||
endmodule
|
||||
|
||||
module qsel4 (
|
||||
input logic [`DIVLEN+3:0] D,
|
||||
input logic [`DIVLEN+3:0] WS, WC,
|
||||
output logic [3:0] q
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] QSel4[1023:0];
|
||||
|
||||
always_comb begin
|
||||
integer d, w, i, w2;
|
||||
for(d=0; d<8; d++)
|
||||
for(w=0; w<128; w++)begin
|
||||
i = d*128+w;
|
||||
w2 = w-128*(w>=64); // convert to two's complement
|
||||
case(d)
|
||||
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-4) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-13) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
1: if(w2>=14) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-15) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
2: if(w2>=15) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-16) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
3: if(w2>=16) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-18) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
4: if(w2>=18) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
5: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
6: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-22) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
7: if(w2>=24) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-24) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
assign q = QSel4[{Dmsbs,Wmsbs}];
|
||||
|
||||
endmodule
|
@ -35,32 +35,29 @@ module resultsign(
|
||||
input logic InfIn,
|
||||
input logic FmaOp,
|
||||
input logic [`NE+1:0] FmaSe,
|
||||
input logic FmaSmZero,
|
||||
input logic FmaSZero,
|
||||
input logic Mult,
|
||||
input logic R,
|
||||
input logic S,
|
||||
input logic Nsgn,
|
||||
input logic Ms,
|
||||
output logic Ws
|
||||
);
|
||||
|
||||
logic ZeroSgn;
|
||||
logic InfSgn;
|
||||
logic Underflow;
|
||||
// logic ResultSgnTmp;
|
||||
logic Zeros;
|
||||
logic Infs;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// if multiply then Psgn
|
||||
// otherwise psign
|
||||
assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
|
||||
assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
|
||||
assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
|
||||
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign InfSgn = ZInf ? FmaAs : FmaPs;
|
||||
assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
|
||||
assign Infs = ZInf ? FmaAs : FmaPs;
|
||||
assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
|
||||
|
||||
endmodule
|
@ -46,36 +46,32 @@ module round(
|
||||
input logic [1:0] PostProcSel,
|
||||
input logic CvtResDenormUf,
|
||||
input logic CvtResUf,
|
||||
input logic [`CORRSHIFTSZ-1:0] Nfrac,
|
||||
input logic FmaZmSticky, // addend's sticky bit
|
||||
input logic ZZero, // is Z zero
|
||||
input logic FmaInvA, // invert Z
|
||||
input logic [`CORRSHIFTSZ-1:0] Mf,
|
||||
input logic FmaZmS, // addend's sticky bit
|
||||
input logic [`NE+1:0] FmaSe, // exponent of the normalized sum
|
||||
input logic Nsgn, // the result's sign
|
||||
input logic Ms, // the result's sign
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent
|
||||
input logic [`NE+1:0] DivCorrExp, // the calculated expoent
|
||||
input logic DivSticky, // sticky bit
|
||||
input logic DivNegSticky,
|
||||
input logic [`NE+1:0] Qe, // the calculated expoent
|
||||
input logic DivS, // sticky bit
|
||||
output logic UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
|
||||
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] Rf, // Result fraction
|
||||
output logic [`NE-1:0] Re, // Result exponent
|
||||
output logic S, // sticky bit
|
||||
output logic [`NE+1:0] Nexp,
|
||||
output logic [`NE+1:0] Me,
|
||||
output logic Plus1,
|
||||
output logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
output logic R, UfLSBRes // bits needed to calculate rounding
|
||||
output logic R, UfL // bits needed to calculate rounding
|
||||
);
|
||||
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
|
||||
logic NormSumSticky; // normalized sum's sticky bit
|
||||
logic UfSticky; // sticky bit for underlow calculation
|
||||
logic L; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic UfCalcPlus1;
|
||||
logic NormS; // normalized sum's sticky bit
|
||||
logic UfS; // sticky bit for underlow calculation
|
||||
logic [`NF-1:0] RoundFrac;
|
||||
logic FpRes, IntRes;
|
||||
logic UfRound;
|
||||
logic UfR;
|
||||
logic FpRound, FpLSBRes, FpUfRound;
|
||||
logic CalcPlus1, FpPlus1;
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
@ -118,61 +114,61 @@ module round(
|
||||
// | NF |1|1|
|
||||
// ^ ^ if floating point result
|
||||
// ^ if not an FMA result
|
||||
if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN
|
||||
if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// XLEN is either 64 or 32
|
||||
// so half and single are always smaller then XLEN
|
||||
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
// Quad precision will always be greater than XLEN
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
|
||||
end
|
||||
|
||||
@ -180,37 +176,37 @@ module round(
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
|
||||
assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
|
||||
assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
|
||||
assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
|
||||
assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
|
||||
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
|
||||
assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
end
|
||||
`FMT1: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
end
|
||||
`FMT2: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpRound = 1'bx;
|
||||
@ -222,130 +218,97 @@ module round(
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
|
||||
FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
|
||||
assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
|
||||
assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
|
||||
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
|
||||
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
|
||||
assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfLSBRes = FpRound;
|
||||
assign UfL = FpRound;
|
||||
// determine sticky
|
||||
assign S = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
// - for FMA or if division has a negitive sticky bit
|
||||
assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
|
||||
assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
|
||||
assign S = UfS | UfR;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (Frm)
|
||||
3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even
|
||||
3'b000: CalcPlus1 = R & (S| L);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down
|
||||
3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up
|
||||
3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude
|
||||
3'b010: CalcPlus1 = Ms;//round down
|
||||
3'b011: CalcPlus1 = ~Ms;//round up
|
||||
3'b100: CalcPlus1 = R;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (Frm)
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
3'b010: UfCalcPlus1 = Ms;//round down
|
||||
3'b011: UfCalcPlus1 = ~Ms;//round up
|
||||
3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (Frm)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
|
||||
3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (S | R);
|
||||
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
|
||||
assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (S | R);
|
||||
assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
|
||||
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
|
||||
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
|
||||
default: RoundAdd = (`FLEN+1)'(0);
|
||||
endcase
|
||||
end
|
||||
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
|
||||
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
|
||||
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
end else if (`FPSIZES == 4)
|
||||
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
|
||||
|
||||
// determine the result to be roundned
|
||||
assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
|
||||
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
|
||||
|
||||
always_comb
|
||||
case(PostProcSel)
|
||||
2'b10: Nexp = FmaSe; // fma
|
||||
2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
|
||||
2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
|
||||
default: Nexp = '0;
|
||||
2'b10: Me = FmaSe; // fma
|
||||
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
|
||||
2'b01: Me = DivDone ? Qe : '0; // divide
|
||||
default: Me = '0;
|
||||
endcase
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
|
||||
assign Re = FullResExp[`NE-1:0];
|
||||
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
|
||||
assign Re = FullRe[`NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
@ -38,11 +38,11 @@ module roundsign(
|
||||
input logic DivOp,
|
||||
input logic CvtOp,
|
||||
input logic CvtCs,
|
||||
output logic Nsgn
|
||||
output logic Ms
|
||||
);
|
||||
|
||||
logic FmaResSgnTmp;
|
||||
logic DivSgn;
|
||||
logic Qs;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
@ -52,9 +52,9 @@ module roundsign(
|
||||
|
||||
// assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
|
||||
|
||||
assign DivSgn = Xs^Ys;
|
||||
assign Qs = Xs^Ys;
|
||||
|
||||
// Sign for rounding calulation
|
||||
assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
|
||||
assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
|
||||
|
||||
endmodule
|
@ -28,23 +28,22 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module lzacorrection(
|
||||
module shiftcorrection(
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
input logic FmaOp,
|
||||
input logic DivOp,
|
||||
input logic DivResDenorm,
|
||||
input logic [`NE+1:0] DivCalcExp,
|
||||
input logic [`NE+1:0] DivQe,
|
||||
input logic [`NE+1:0] DivDenormShift,
|
||||
input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
input logic FmaKillProd, // is the product set to zero
|
||||
input logic FmaSmZero,
|
||||
output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] DivCorrExp,
|
||||
input logic FmaSZero,
|
||||
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] Qe,
|
||||
output logic [`NE+1:0] FmaSe // exponent of the normalized sum
|
||||
);
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
|
||||
logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
|
||||
logic ResDenorm; // is the result denormalized
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
@ -54,16 +53,16 @@ module lzacorrection(
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
|
||||
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
|
||||
assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
|
||||
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
|
||||
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
|
||||
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
|
||||
assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
|
||||
assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
|
||||
assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
|
||||
endmodule
|
@ -29,17 +29,17 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultselect(
|
||||
module specialcase(
|
||||
input logic Xs, // input signs
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
|
||||
input logic XNaN, YNaN, ZNaN, // inputs are NaN
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn,
|
||||
input logic NaNIn,
|
||||
input logic XInf, YInf,
|
||||
input logic XZero,
|
||||
input logic IntZero,
|
||||
input logic NaNIn,
|
||||
input logic IntToFp,
|
||||
input logic Int64,
|
||||
input logic Signed,
|
||||
@ -53,10 +53,10 @@ module resultselect(
|
||||
input logic IntInvalid, Invalid, Overflow, // flags
|
||||
input logic CvtResUf,
|
||||
input logic [`NE-1:0] Re, // Res exponent
|
||||
input logic [`NE+1:0] FullResExp, // Res exponent
|
||||
input logic [`NE+1:0] FullRe, // Res exponent
|
||||
input logic [`NF-1:0] Rf, // Res fraction
|
||||
input logic [`XLEN+1:0] CvtNegRes, // the negation of the result
|
||||
output logic [`FLEN-1:0] W, // final res
|
||||
output logic [`FLEN-1:0] PostProcRes, // final res
|
||||
output logic [`XLEN-1:0] FCvtIntRes // final res
|
||||
);
|
||||
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
|
||||
@ -231,11 +231,11 @@ module resultselect(
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
|
||||
// output infinity with result sign if divide by zero
|
||||
if(`IEEE754) begin
|
||||
assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
|
||||
assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
|
||||
YNaN&~CvtOp ? YNaNRes :
|
||||
ZNaN&FmaOp ? ZNaNRes :
|
||||
Invalid ? InvalidRes :
|
||||
@ -243,7 +243,7 @@ module resultselect(
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end else begin
|
||||
assign W = NaNIn|Invalid ? InvalidRes :
|
||||
assign PostProcRes = NaNIn|Invalid ? InvalidRes :
|
||||
SelOfRes ? OfRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
@ -1,312 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// srt.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module srtradix4 (
|
||||
input logic clk,
|
||||
input logic DivStart,
|
||||
input logic DivBusy,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic [`DIVLEN-1:0] X,
|
||||
input logic [`DIVLEN-1:0] Dpreproc,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [`DIVLEN+2:0] Quot,
|
||||
output logic [`DIVLEN+3:0] WSN, WCN,
|
||||
output logic [`DIVLEN+3:0] WS, WC,
|
||||
output logic [`NE+1:0] DivCalcExpM,
|
||||
output logic [`XLEN-1:0] Rem
|
||||
);
|
||||
|
||||
logic [3:0] q;
|
||||
logic [`DIVLEN+3:0] WSA;
|
||||
logic [`DIVLEN+3:0] WCA;
|
||||
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
|
||||
logic [`NE+1:0] DivCalcExp;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp;
|
||||
logic intSign;
|
||||
|
||||
// Top Muxes and Registers
|
||||
// When start is asserted, the inputs are loaded into the divider.
|
||||
// Otherwise, the divisor is retained and the partial remainder
|
||||
// is fed back for the next iteration.
|
||||
// - when the start signal is asserted X and 0 are loaded into WS and WC
|
||||
// - otherwise load WSA into the flipflop
|
||||
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
|
||||
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
|
||||
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
|
||||
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
|
||||
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
|
||||
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
|
||||
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
|
||||
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
|
||||
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
|
||||
// *** change this for radix 4 - generate w/ stine code
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
qsel4 qsel4(.D, .WS, .WC, .q);
|
||||
|
||||
// Divisor Selection logic
|
||||
// *** radix 4 change to choose -2 to 2
|
||||
// - choose the negitive version of what's being selected
|
||||
assign DBar = ~D;
|
||||
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
|
||||
assign D2 = {D[`DIVLEN+2:0], 1'b0};
|
||||
|
||||
always_comb
|
||||
case (q)
|
||||
4'b1000: Dsel = DBar2;
|
||||
4'b0100: Dsel = DBar;
|
||||
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
|
||||
4'b0010: Dsel = D;
|
||||
4'b0001: Dsel = D2;
|
||||
default: Dsel = {`DIVLEN+4{1'bx}};
|
||||
endcase
|
||||
|
||||
// Partial Product Generation
|
||||
// WSA, WCA = WS + WC - qD
|
||||
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
|
||||
|
||||
//*** change for radix 4
|
||||
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
|
||||
|
||||
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
|
||||
|
||||
endmodule
|
||||
|
||||
////////////////
|
||||
// Submodules //
|
||||
////////////////
|
||||
|
||||
|
||||
|
||||
module qsel4 (
|
||||
input logic [`DIVLEN+3:0] D,
|
||||
input logic [`DIVLEN+3:0] WS, WC,
|
||||
output logic [3:0] q
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] QSel4[1023:0];
|
||||
|
||||
initial begin
|
||||
integer d, w, i, w2;
|
||||
for(d=0; d<8; d++)
|
||||
for(w=0; w<128; w++)begin
|
||||
i = d*128+w;
|
||||
w2 = w-128*(w>=64); // convert to two's complement
|
||||
case(d)
|
||||
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-4) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-13) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
1: if(w2>=14) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-15) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
2: if(w2>=15) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-16) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
3: if(w2>=16) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-18) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
4: if(w2>=18) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
5: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
6: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-22) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
7: if(w2>=24) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-24) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
assign q = QSel4[{Dmsbs,Wmsbs}];
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////////
|
||||
// On-The-Fly Converter, Radix 2 //
|
||||
///////////////////////////////////
|
||||
module otfc4 (
|
||||
input logic clk,
|
||||
input logic DivStart,
|
||||
input logic DivBusy,
|
||||
input logic [3:0] q,
|
||||
output logic [`DIVLEN+2:0] Quot
|
||||
);
|
||||
|
||||
// The on-the-fly converter transfers the quotient
|
||||
// bits to the quotient as they come.
|
||||
//
|
||||
// This code follows the psuedocode presented in the
|
||||
// floating point chapter of the book. Right now,
|
||||
// it is written for Radix-4 division.
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [`DIVLEN:0] QR, QMR;
|
||||
// if starting a new divison set Q to 0 and QM to -1
|
||||
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
|
||||
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
|
||||
flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
|
||||
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
|
||||
|
||||
// shift Q (quotent) and QM (quotent-1)
|
||||
// if q = 2 Q = {Q, 10} QM = {Q, 01}
|
||||
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
|
||||
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
|
||||
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
|
||||
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
|
||||
// *** how does the 0 concatination numbers work?
|
||||
|
||||
always_comb begin
|
||||
QR = Quot[`DIVLEN:0];
|
||||
QMR = QM[`DIVLEN:0]; // Shift Q and QM
|
||||
if (q[3]) begin // +2
|
||||
QNext = {QR, 2'b10};
|
||||
QMNext = {QR, 2'b01};
|
||||
end else if (q[2]) begin // +1
|
||||
QNext = {QR, 2'b01};
|
||||
QMNext = {QR, 2'b00};
|
||||
end else if (q[1]) begin // -1
|
||||
QNext = {QMR, 2'b11};
|
||||
QMNext = {QMR, 2'b10};
|
||||
end else if (q[0]) begin // -2
|
||||
QNext = {QMR, 2'b10};
|
||||
QMNext = {QMR, 2'b01};
|
||||
end else begin // 0
|
||||
QNext = {QR, 2'b00};
|
||||
QMNext = {QMR, 2'b11};
|
||||
end
|
||||
end
|
||||
// Final Quoteint is in the range [.5, 2)
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
/////////
|
||||
// csa //
|
||||
/////////
|
||||
module csa #(parameter N=69) (
|
||||
input logic [N-1:0] in1, in2, in3,
|
||||
input logic cin,
|
||||
output logic [N-1:0] out1, out2
|
||||
);
|
||||
|
||||
// This block adds in1, in2, in3, and cin to produce
|
||||
// a result out1 / out2 in carry-save redundant form.
|
||||
// cin is just added to the least significant bit and
|
||||
// is Startuired to handle adding a negative divisor.
|
||||
// Fortunately, the carry (out2) is shifted left by one
|
||||
// bit, leaving room in the least significant bit to
|
||||
// insert cin.
|
||||
|
||||
assign out1 = in1 ^ in2 ^ in3;
|
||||
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
|
||||
(in2[N-2:0] & in3[N-2:0]), cin};
|
||||
endmodule
|
||||
|
||||
module expcalc(
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic XZeroE,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [`NE+1:0] DivCalcExp
|
||||
);
|
||||
logic [`NE-2:0] Bias;
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Bias = (`NE-1)'(`BIAS);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Bias = (`NE-1)'(`BIAS);
|
||||
`FMT1: Bias = (`NE-1)'(`BIAS1);
|
||||
`FMT2: Bias = (`NE-1)'(`BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'h3: Bias = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: Bias = (`NE-1)'(`D_BIAS);
|
||||
2'h0: Bias = (`NE-1)'(`S_BIAS);
|
||||
2'h2: Bias = (`NE-1)'(`H_BIAS);
|
||||
endcase
|
||||
end
|
||||
// correct exponent for denormalized input's normalization shifts
|
||||
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
|
||||
endmodule
|
259
pipelined/src/fpu/srt.sv
Normal file
259
pipelined/src/fpu/srt.sv
Normal file
@ -0,0 +1,259 @@
|
||||
///////////////////////////////////////////
|
||||
// srt.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module srt(
|
||||
input logic clk,
|
||||
input logic DivStart,
|
||||
input logic DivBusy,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic [`DIVLEN-1:0] X,
|
||||
input logic [`DIVLEN-1:0] Dpreproc,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
input logic NegSticky,
|
||||
output logic [`QLEN-1-(`RADIX/4):0] Quot,
|
||||
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
|
||||
output logic [`DIVLEN+3:0] StickyWSA,
|
||||
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
|
||||
output logic [`NE+1:0] DivCalcExpM,
|
||||
output logic [`XLEN-1:0] Rem
|
||||
);
|
||||
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0];
|
||||
logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0];
|
||||
logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0];
|
||||
logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0];
|
||||
logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
|
||||
logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
|
||||
logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
|
||||
logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
logic [`DIVLEN+3:0] WSN, WCN;
|
||||
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
|
||||
logic [`NE+1:0] DivCalcExp;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp;
|
||||
logic intSign;
|
||||
logic [`QLEN-1:0] QMMux;
|
||||
|
||||
// Top Muxes and Registers
|
||||
// When start is asserted, the inputs are loaded into the divider.
|
||||
// Otherwise, the divisor is retained and the partial remainder
|
||||
// is fed back for the next iteration.
|
||||
// - when the start signal is asserted X and 0 are loaded into WS and WC
|
||||
// - otherwise load WSA into the flipflop
|
||||
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
|
||||
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
|
||||
if (`RADIX == 2) begin : nextw
|
||||
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
|
||||
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
|
||||
end else begin
|
||||
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
|
||||
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
|
||||
end
|
||||
|
||||
mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
|
||||
flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
|
||||
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
|
||||
flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
|
||||
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
|
||||
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
|
||||
|
||||
|
||||
// Divisor Selections
|
||||
// - choose the negitive version of what's being selected
|
||||
assign DBar = ~D;
|
||||
if(`RADIX == 4) begin : d2
|
||||
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
|
||||
assign D2 = {D[`DIVLEN+2:0], 1'b0};
|
||||
end
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
|
||||
divinteration divinteration(.D, .DBar, .D2, .DBar2,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
|
||||
if(i<(`DIVCOPIES-1)) begin
|
||||
if (`RADIX==2)begin
|
||||
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0};
|
||||
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0};
|
||||
end else begin
|
||||
assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
|
||||
assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
|
||||
end
|
||||
assign Q[i+1] = QNext[i];
|
||||
assign QM[i+1] = QMNext[i];
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// if starting a new divison set Q to 0 and QM to -1
|
||||
mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
|
||||
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
|
||||
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
|
||||
|
||||
assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
|
||||
assign FirstWS = WS[0];
|
||||
assign FirstWC = WC[0];
|
||||
if(`RADIX==2)
|
||||
if (`DIVCOPIES == 1)
|
||||
assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0};
|
||||
else
|
||||
assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
|
||||
|
||||
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
|
||||
|
||||
endmodule
|
||||
|
||||
////////////////
|
||||
// Submodules //
|
||||
////////////////
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
module divinteration (
|
||||
input logic [`DIVLEN+3:0] D,
|
||||
input logic [`DIVLEN+3:0] DBar, D2, DBar2,
|
||||
input logic [`QLEN-1:0] Q, QM,
|
||||
input logic [`DIVLEN+3:0] WS, WC,
|
||||
output logic [`QLEN-1:0] QNext, QMNext,
|
||||
output logic [`DIVLEN+3:0] WSA, WCA
|
||||
);
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
logic [`DIVLEN+3:0] Dsel;
|
||||
logic [3:0] q;
|
||||
logic qp, qz;//, qn;
|
||||
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
if(`RADIX == 2) begin : qsel
|
||||
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn);
|
||||
end else begin
|
||||
qsel4 qsel4(.D, .WS, .WC, .q);
|
||||
end
|
||||
|
||||
if(`RADIX == 2) begin : dsel
|
||||
assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D);
|
||||
end else begin
|
||||
always_comb
|
||||
case (q)
|
||||
4'b1000: Dsel = DBar2;
|
||||
4'b0100: Dsel = DBar;
|
||||
4'b0000: Dsel = '0;
|
||||
4'b0010: Dsel = D;
|
||||
4'b0001: Dsel = D2;
|
||||
default: Dsel = 'x;
|
||||
endcase
|
||||
end
|
||||
// Partial Product Generation
|
||||
// WSA, WCA = WS + WC - qD
|
||||
if (`RADIX == 2) begin : csa
|
||||
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
|
||||
end else begin
|
||||
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
|
||||
end
|
||||
|
||||
if (`RADIX == 2) begin : otfc
|
||||
otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
|
||||
end else begin
|
||||
otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
/////////
|
||||
// csa //
|
||||
/////////
|
||||
module csa #(parameter N=69) (
|
||||
input logic [N-1:0] in1, in2, in3,
|
||||
input logic cin,
|
||||
output logic [N-1:0] out1, out2
|
||||
);
|
||||
|
||||
// This block adds in1, in2, in3, and cin to produce
|
||||
// a result out1 / out2 in carry-save redundant form.
|
||||
// cin is just added to the least significant bit and
|
||||
// is Startuired to handle adding a negative divisor.
|
||||
// Fortunately, the carry (out2) is shifted left by one
|
||||
// bit, leaving room in the least significant bit to
|
||||
// insert cin.
|
||||
|
||||
assign out1 = in1 ^ in2 ^ in3;
|
||||
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
|
||||
(in2[N-2:0] & in3[N-2:0]), cin};
|
||||
endmodule
|
||||
|
||||
module expcalc(
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic XZeroE,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [`NE+1:0] DivCalcExp
|
||||
);
|
||||
logic [`NE-2:0] Bias;
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Bias = (`NE-1)'(`BIAS);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Bias = (`NE-1)'(`BIAS);
|
||||
`FMT1: Bias = (`NE-1)'(`BIAS1);
|
||||
`FMT2: Bias = (`NE-1)'(`BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'h3: Bias = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: Bias = (`NE-1)'(`D_BIAS);
|
||||
2'h0: Bias = (`NE-1)'(`S_BIAS);
|
||||
2'h2: Bias = (`NE-1)'(`H_BIAS);
|
||||
endcase
|
||||
end
|
||||
// correct exponent for denormalized input's normalization shifts
|
||||
assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
|
||||
endmodule
|
@ -33,37 +33,45 @@
|
||||
module srtfsm(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
|
||||
input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic DivStart,
|
||||
input logic StallE,
|
||||
input logic StallM,
|
||||
input logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
|
||||
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
|
||||
input logic StallE,
|
||||
input logic StallM,
|
||||
input logic [`DIVLEN+3:0] StickyWSA,
|
||||
input logic [`DURLEN-1:0] Dur,
|
||||
output logic [`DURLEN-1:0] EarlyTermShiftE,
|
||||
output logic DivStickyE,
|
||||
output logic DivDone,
|
||||
output logic DivNegStickyE,
|
||||
output logic NegSticky,
|
||||
output logic DivBusy
|
||||
);
|
||||
|
||||
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
|
||||
statetype state;
|
||||
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] step;
|
||||
logic [`DURLEN-1:0] step;
|
||||
logic WZero;
|
||||
//logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
|
||||
logic [`DIVLEN+3:0] W;
|
||||
|
||||
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
|
||||
assign DivBusy = (state == BUSY);
|
||||
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
|
||||
assign DivStickyE = ~WZero;
|
||||
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
|
||||
// calculate sticky bit
|
||||
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
|
||||
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
|
||||
// radix-4 division can't create a QM that continually adds 0's
|
||||
if (`RADIX == 2)
|
||||
assign DivStickyE = |W&~(StickyWSA == WS);
|
||||
else
|
||||
assign DivStickyE = |W;
|
||||
assign DivDone = (state == DONE);
|
||||
assign W = WC+WS;
|
||||
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
|
||||
assign EarlyTermShiftDiv2E = step;
|
||||
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
|
||||
assign EarlyTermShiftE = step;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@ -73,7 +81,7 @@ module srtfsm(
|
||||
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
|
||||
else state <= #1 BUSY;
|
||||
end else if (state == BUSY) begin
|
||||
if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
|
||||
if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
|
||||
state <= #1 DONE;
|
||||
end
|
||||
step <= step - 1;
|
||||
|
@ -31,11 +31,11 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module srtpreproc (
|
||||
input logic [`NF:0] XManE, YManE,
|
||||
input logic [`NF:0] Xm, Ym,
|
||||
output logic [`DIVLEN-1:0] X,
|
||||
output logic [`DIVLEN-1:0] Dpreproc,
|
||||
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
|
||||
output logic [`DURLEN-1:0] Dur
|
||||
);
|
||||
// logic [`XLEN-1:0] PosA, PosB;
|
||||
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
|
||||
@ -49,24 +49,33 @@ module srtpreproc (
|
||||
|
||||
// ***can probably merge X LZC with conversion
|
||||
// cout the number of leading zeros
|
||||
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
|
||||
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
|
||||
lzc #(`NF+1) lzcA (Xm, XZeroCnt);
|
||||
lzc #(`NF+1) lzcB (Ym, YZeroCnt);
|
||||
|
||||
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
|
||||
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
|
||||
|
||||
// assign PreprocA = ExtraA << zeroCntA;
|
||||
// assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
|
||||
|
||||
assign X = PreprocX;
|
||||
assign Dpreproc = PreprocY;
|
||||
|
||||
assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
|
||||
assign Dur = (`DURLEN)'(`FPDUR);
|
||||
// assign intExp = zeroCntB - zeroCntA + 1;
|
||||
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
|
||||
|
||||
// radix 2 radix 4
|
||||
// 1 copies DIVLEN+2 DIVLEN+2/2
|
||||
// 2 copies DIVLEN+2/2 DIVLEN+2/2*2
|
||||
// 4 copies DIVLEN+2/4 DIVLEN+2/2*4
|
||||
// 8 copies DIVLEN+2/8 DIVLEN+2/2*8
|
||||
|
||||
// DIVRESLEN = DIVLEN or DIVLEN+2
|
||||
// r = 1 or 2
|
||||
// DIVRESLEN/(r*`DIVCOPIES)
|
||||
|
||||
|
||||
endmodule
|
@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
|
||||
/* verilator lint_off CMPCONST */
|
||||
/* verilator lint_off WIDTH */
|
||||
|
||||
int i;
|
||||
logic [31:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one
|
||||
|
@ -226,7 +226,7 @@ module ifu (
|
||||
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
|
||||
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
|
||||
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
|
||||
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
|
||||
.CacheFetchLine(ICacheFetchLine), .FStore2(),
|
||||
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
|
||||
.Cacheable(CacheableF),
|
||||
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
|
||||
|
@ -58,7 +58,7 @@ module lsu (
|
||||
input logic sfencevmaM,
|
||||
// fpu
|
||||
input logic [`FLEN-1:0] FWriteDataM,
|
||||
input logic FLoad2,
|
||||
input logic FStore2,
|
||||
input logic FpLoadStoreM,
|
||||
// faults
|
||||
output logic LoadPageFaultM, StoreAmoPageFaultM,
|
||||
@ -192,7 +192,8 @@ module lsu (
|
||||
// Memory System
|
||||
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
|
||||
logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
|
||||
logic [`LLEN-1:0] FinalWriteDataM;
|
||||
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
|
||||
logic [`LLEN-1:0] ReadDataWordMuxM;
|
||||
logic IgnoreRequest;
|
||||
@ -202,7 +203,7 @@ module lsu (
|
||||
if (`DMEM == `MEM_TIM) begin : dtim
|
||||
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
|
||||
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
|
||||
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
|
||||
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
|
||||
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
|
||||
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
|
||||
.DCacheMiss, .DCacheAccess);
|
||||
@ -230,15 +231,19 @@ module lsu (
|
||||
|
||||
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
|
||||
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
|
||||
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
|
||||
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
|
||||
.s(SelUncachedAdr), .y(LSUBusHWDATA));
|
||||
|
||||
if(CACHE_ENABLED) begin : dcache
|
||||
if (`LLEN>`XLEN)
|
||||
mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
|
||||
else
|
||||
assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
|
||||
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
|
||||
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
|
||||
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
|
||||
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
|
||||
.ByteMask(ByteMaskM), .WordCount, .FStore2,
|
||||
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
|
||||
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
|
||||
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
|
||||
@ -286,10 +291,10 @@ module lsu (
|
||||
// swap the bytes when read from big-endian memory
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (`BIGENDIAN_SUPPORTED) begin:endian
|
||||
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
|
||||
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
|
||||
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
|
||||
end else begin
|
||||
assign FinalWriteDataM = LittleEndianWriteDataM;
|
||||
assign IEUWriteDataM = LittleEndianWriteDataM;
|
||||
assign LittleEndianReadDataWordM = ReadDataWordM;
|
||||
end
|
||||
|
||||
|
@ -172,8 +172,8 @@ module plic_apb (
|
||||
end
|
||||
|
||||
// pending interrupt requests
|
||||
//assign nextIntPending = (intPending | requests) & ~intInProgress; //
|
||||
assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
|
||||
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
|
||||
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
|
||||
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
|
||||
|
||||
// context-dependent signals
|
||||
|
@ -93,7 +93,7 @@ module wallypipelinedcore (
|
||||
logic FStallD;
|
||||
logic FWriteIntE;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic FLoad2;
|
||||
logic FStore2;
|
||||
logic [`FLEN-1:0] FWriteDataM;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
@ -259,7 +259,7 @@ module wallypipelinedcore (
|
||||
.CommittedM, .DCacheMiss, .DCacheAccess,
|
||||
.SquashSCW,
|
||||
.FpLoadStoreM,
|
||||
.FWriteDataM, .FLoad2,
|
||||
.FWriteDataM, .FStore2,
|
||||
//.DataMisalignedM(DataMisalignedM),
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataE,
|
||||
.ReadDataW, .FlushDCacheM,
|
||||
@ -400,7 +400,7 @@ module wallypipelinedcore (
|
||||
.STATUS_FS, // is floating-point enabled?
|
||||
.FRegWriteM, // FP register write enable
|
||||
.FpLoadStoreM,
|
||||
.FLoad2,
|
||||
.FStore2,
|
||||
.FStallD, // Stall the decode stage
|
||||
.FWriteIntE, // integer register write enable
|
||||
.FWriteDataE, // Data to be written to memory
|
||||
|
@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp,
|
||||
// Print r in standard double format
|
||||
fprintf(fptr, "%03x", rExp|(rSign<<11));
|
||||
printhex(fptr, rFrac);
|
||||
fprintf(fptr, "_");
|
||||
|
||||
// Spacing for testbench, value doesn't matter
|
||||
fprintf(fptr, "%016x", 0);
|
||||
fprintf(fptr, "\n");
|
||||
}
|
||||
|
||||
|
@ -1,2 +1 @@
|
||||
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
|
||||
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
|
||||
|
@ -1,198 +0,0 @@
|
||||
/*
|
||||
Program: qslc_r4a2.c
|
||||
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
|
||||
User: James E. Stine
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#define DIVISOR_SIZE 3
|
||||
#define CARRY_SIZE 7
|
||||
#define SUM_SIZE 7
|
||||
#define TOT_SIZE 7
|
||||
|
||||
void disp_binary(double, int, int);
|
||||
|
||||
struct bits {
|
||||
unsigned int divisor : DIVISOR_SIZE;
|
||||
int tot : TOT_SIZE;
|
||||
} pla;
|
||||
|
||||
/*
|
||||
|
||||
Function: disp_binary
|
||||
Description: This function displays a Double-Precision number into
|
||||
four 16 bit integers using the global union variable
|
||||
dp_number
|
||||
Argument List: double x The value to be converted
|
||||
int bits_to_left Number of bits left of radix point
|
||||
int bits_to_right Number of bits right of radix point
|
||||
Return value: none
|
||||
|
||||
*/
|
||||
void disp_binary(double x, int bits_to_left, int bits_to_right) {
|
||||
int i;
|
||||
double diff;
|
||||
|
||||
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
printf("0");
|
||||
}
|
||||
if (i == bits_to_right+1)
|
||||
;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (x < 0.0)
|
||||
x = pow(2.0, ((double) bits_to_left)) + x;
|
||||
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
diff = pow(2.0, ((double) -i) );
|
||||
if (x < diff)
|
||||
printf("0");
|
||||
else {
|
||||
printf("1");
|
||||
x -= diff;
|
||||
}
|
||||
if (i == 0)
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main() {
|
||||
int m;
|
||||
int n;
|
||||
int o;
|
||||
pla.divisor = 0;
|
||||
pla.tot = 0;
|
||||
printf("\tcase({D[5:3],Wmsbs})\n");
|
||||
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
|
||||
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
|
||||
printf("\t\t10'b");
|
||||
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
|
||||
printf("_");
|
||||
disp_binary((double) pla.tot, TOT_SIZE, 0);
|
||||
printf(": q = 4'b");
|
||||
|
||||
/*
|
||||
4 bits for Radix 4 (a=2)
|
||||
1000 = +2
|
||||
0100 = +1
|
||||
0000 = 0
|
||||
0010 = -1
|
||||
0001 = -2
|
||||
*/
|
||||
switch (pla.divisor) {
|
||||
case 0:
|
||||
if ((pla.tot) >= 12)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -4)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -13)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 1:
|
||||
if ((pla.tot) >= 14)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -15)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 2:
|
||||
if ((pla.tot) >= 15)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 3:
|
||||
if ((pla.tot) >= 16)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -18)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 4:
|
||||
if ((pla.tot) >= 18)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 6)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -20)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 5:
|
||||
if ((pla.tot) >= 20)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 6)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -20)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 6:
|
||||
if ((pla.tot) >= 20)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -22)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 7:
|
||||
if ((pla.tot) >= 24)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -24)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
default: printf ("XXX");
|
||||
|
||||
}
|
||||
|
||||
printf(";\n");
|
||||
(pla.tot)++;
|
||||
}
|
||||
(pla.divisor)++;
|
||||
}
|
||||
printf("\tendcase\n");
|
||||
|
||||
}
|
Binary file not shown.
@ -1,190 +0,0 @@
|
||||
/*
|
||||
Program: qslc_r4a2.c
|
||||
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
|
||||
User: James E. Stine
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#define DIVISOR_SIZE 3
|
||||
#define CARRY_SIZE 7
|
||||
#define SUM_SIZE 7
|
||||
#define TOT_SIZE 7
|
||||
|
||||
void disp_binary(double, int, int);
|
||||
|
||||
struct bits {
|
||||
unsigned int divisor : DIVISOR_SIZE;
|
||||
int tot : TOT_SIZE;
|
||||
} pla;
|
||||
|
||||
/*
|
||||
|
||||
Function: disp_binary
|
||||
Description: This function displays a Double-Precision number into
|
||||
four 16 bit integers using the global union variable
|
||||
dp_number
|
||||
Argument List: double x The value to be converted
|
||||
int bits_to_left Number of bits left of radix point
|
||||
int bits_to_right Number of bits right of radix point
|
||||
Return value: none
|
||||
|
||||
*/
|
||||
void disp_binary(double x, int bits_to_left, int bits_to_right) {
|
||||
int i;
|
||||
double diff;
|
||||
|
||||
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
printf("0");
|
||||
}
|
||||
if (i == bits_to_right+1)
|
||||
;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (x < 0.0)
|
||||
x = pow(2.0, ((double) bits_to_left)) + x;
|
||||
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
diff = pow(2.0, ((double) -i) );
|
||||
if (x < diff)
|
||||
printf("0");
|
||||
else {
|
||||
printf("1");
|
||||
x -= diff;
|
||||
}
|
||||
if (i == 0)
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main() {
|
||||
int m;
|
||||
int n;
|
||||
int o;
|
||||
pla.divisor = 0;
|
||||
pla.tot = 0;
|
||||
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
|
||||
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
|
||||
/*
|
||||
4 bits for Radix 4 (a=2)
|
||||
1000 = +2
|
||||
0100 = +1
|
||||
0000 = 0
|
||||
0010 = -1
|
||||
0001 = -2
|
||||
*/
|
||||
switch (pla.divisor) {
|
||||
case 0:
|
||||
if ((pla.tot) >= 12)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -4)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -13)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 1:
|
||||
if ((pla.tot) >= 14)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -15)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 2:
|
||||
if ((pla.tot) >= 15)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 3:
|
||||
if ((pla.tot) >= 16)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 4)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -6)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -18)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 4:
|
||||
if ((pla.tot) >= 18)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 6)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -20)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 5:
|
||||
if ((pla.tot) >= 20)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 6)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -20)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 6:
|
||||
if ((pla.tot) >= 20)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -22)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
case 7:
|
||||
if ((pla.tot) >= 24)
|
||||
printf("8");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("4");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0");
|
||||
else if ((pla.tot) >= -24)
|
||||
printf("2");
|
||||
else
|
||||
printf("1");
|
||||
break;
|
||||
default: printf ("X");
|
||||
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
(pla.tot)++;
|
||||
}
|
||||
(pla.divisor)++;
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -1,198 +0,0 @@
|
||||
/*
|
||||
Program: qslc_r4a2.c
|
||||
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
|
||||
User: James E. Stine
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#define DIVISOR_SIZE 3
|
||||
#define CARRY_SIZE 7
|
||||
#define SUM_SIZE 7
|
||||
#define TOT_SIZE 7
|
||||
|
||||
void disp_binary(double, int, int);
|
||||
|
||||
struct bits {
|
||||
unsigned int divisor : DIVISOR_SIZE;
|
||||
int tot : TOT_SIZE;
|
||||
} pla;
|
||||
|
||||
/*
|
||||
|
||||
Function: disp_binary
|
||||
Description: This function displays a Double-Precision number into
|
||||
four 16 bit integers using the global union variable
|
||||
dp_number
|
||||
Argument List: double x The value to be converted
|
||||
int bits_to_left Number of bits left of radix point
|
||||
int bits_to_right Number of bits right of radix point
|
||||
Return value: none
|
||||
|
||||
*/
|
||||
void disp_binary(double x, int bits_to_left, int bits_to_right) {
|
||||
int i;
|
||||
double diff;
|
||||
|
||||
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
printf("0");
|
||||
}
|
||||
if (i == bits_to_right+1)
|
||||
;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (x < 0.0)
|
||||
x = pow(2.0, ((double) bits_to_left)) + x;
|
||||
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
diff = pow(2.0, ((double) -i) );
|
||||
if (x < diff)
|
||||
printf("0");
|
||||
else {
|
||||
printf("1");
|
||||
x -= diff;
|
||||
}
|
||||
if (i == 0)
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main() {
|
||||
int m;
|
||||
int n;
|
||||
int o;
|
||||
pla.divisor = 0;
|
||||
pla.tot = 0;
|
||||
printf("\tcase({D[5:3],Wmsbs})\n");
|
||||
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
|
||||
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
|
||||
printf("\t\t11'b");
|
||||
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
|
||||
printf("_");
|
||||
disp_binary((double) pla.tot, TOT_SIZE, 0);
|
||||
printf(": q = 4'b");
|
||||
|
||||
/*
|
||||
4 bits for Radix 4 (a=2)
|
||||
1000 = +2
|
||||
0100 = +1
|
||||
0000 = 0
|
||||
0010 = -1
|
||||
0001 = -2
|
||||
*/
|
||||
switch (pla.divisor) {
|
||||
case 0:
|
||||
if ((pla.tot) >= 24)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -26)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 1:
|
||||
if ((pla.tot) >= 28)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -10)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -28)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 2:
|
||||
if ((pla.tot) >= 32)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -32)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 3:
|
||||
if ((pla.tot) >= 32)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -34)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 4:
|
||||
if ((pla.tot) >= 36)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 12)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -36)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 5:
|
||||
if ((pla.tot) >= 40)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 12)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -40)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 6:
|
||||
if ((pla.tot) >= 40)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 16)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -44)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 7:
|
||||
if ((pla.tot) >= 44)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 16)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -46)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
default: printf ("XXX");
|
||||
|
||||
}
|
||||
|
||||
printf(";\n");
|
||||
(pla.tot)++;
|
||||
}
|
||||
(pla.divisor)++;
|
||||
}
|
||||
printf("\tendcase\n");
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -30,15 +30,11 @@ void main(void)
|
||||
FILE *fptr;
|
||||
double aFrac, rFrac;
|
||||
int aExp, rExp;
|
||||
double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
|
||||
double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
|
||||
1.75, 1.875, 1.99999,
|
||||
1.1, 1.2, 1.01, 1.001, 1.0001,
|
||||
<<<<<<< Updated upstream
|
||||
1/1.1, 1/1.5, 1/1.25, 1/1.125};
|
||||
=======
|
||||
1.1, 1.5, 1.01, 1.001, 1.0001,
|
||||
2/1.1, 2/1.5, 2/1.25, 2/1.125};
|
||||
>>>>>>> Stashed changes
|
||||
double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
|
||||
11, 12, 13, 14, 15, 16};
|
||||
int i;
|
||||
int bias = 1023;
|
||||
@ -51,10 +47,19 @@ void main(void)
|
||||
for (i=0; i<ENTRIES; i++) {
|
||||
aFrac = mans[i];
|
||||
aExp = exps[i] + bias;
|
||||
rFrac = sqrt(aFrac * pow(2, aExp - bias));
|
||||
rFrac = sqrt(aFrac * pow(2, exps[i]));
|
||||
rExp = (int) (log(rFrac)/log(2) + bias);
|
||||
output(fptr, aExp, aFrac, rExp, rFrac);
|
||||
}
|
||||
|
||||
// WS
|
||||
// Test 1: sqrt(1) = 1 0000 0000 0000 00
|
||||
// Test 2: sqrt(1849/1024) = 43/32 0000 1100 1110 01
|
||||
// Test 3: sqrt(5) 0000 0100 0000 00
|
||||
// Test 4: sqrt(9) = 3 1111 1001 0000 00
|
||||
// Test 5: sqrt(17) 0000 0001 0000 00
|
||||
// Test 6: sqrt(56) 1111 1110 0000 00
|
||||
// Test 7: sqrt(120) 0000 1110 0000 00
|
||||
|
||||
// for (i = 0; i< RANDOM_VECS; i++) {
|
||||
// a = random_input();
|
||||
@ -69,14 +74,23 @@ void main(void)
|
||||
|
||||
void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
|
||||
{
|
||||
// Print a in standard double format
|
||||
fprintf(fptr, "%03x", aExp);
|
||||
printhex(fptr, aFrac);
|
||||
fprintf(fptr, "_");
|
||||
|
||||
// Spacing for testbench, value doesn't matter
|
||||
fprintf(fptr, "%016x", 0);
|
||||
fprintf(fptr, "_");
|
||||
|
||||
// Print r in standard double format
|
||||
fprintf(fptr, "%03x", rExp);
|
||||
printhex(fptr, rFrac);
|
||||
fprintf(fptr, "_");
|
||||
|
||||
// Spacing for testbench, value doesn't matter
|
||||
fprintf(fptr, "%016x", 0);
|
||||
fprintf(fptr, "\n");
|
||||
|
||||
|
||||
}
|
||||
|
||||
void printhex(FILE *fptr, double m)
|
||||
|
@ -1,5 +1,5 @@
|
||||
add wave -noupdate /testbench/*
|
||||
add wave -noupdate /testbench/srt/*
|
||||
add wave -noupdate /testbench/srt/otfc2/*
|
||||
add wave -noupdate /testbench/srt/sotfc2/*
|
||||
add wave -noupdate /testbench/srt/preproc/*
|
||||
add wave -noupdate /testbench/srt/divcounter/*
|
||||
|
@ -29,8 +29,6 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
|
||||
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
|
||||
|
||||
module srt (
|
||||
input logic clk,
|
||||
@ -49,18 +47,19 @@ module srt (
|
||||
input logic Int, // Choose integer inputs
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic rsign, done,
|
||||
output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
|
||||
output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers
|
||||
output logic [`NE-1:0] rExp,
|
||||
output logic [3:0] Flags
|
||||
);
|
||||
|
||||
logic qp, qz, qm; // quotient is +1, 0, or -1
|
||||
logic [`NE-1:0] calcExp;
|
||||
logic calcSign;
|
||||
logic [`DIVLEN+3:0] X, Dpreproc;
|
||||
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
|
||||
logic qp, qz, qn; // quotient is +1, 0, or -1
|
||||
logic [`NE-1:0] calcExp;
|
||||
logic calcSign;
|
||||
logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn;
|
||||
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
|
||||
logic intSign;
|
||||
logic intSign;
|
||||
logic cin;
|
||||
|
||||
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
|
||||
|
||||
@ -76,23 +75,31 @@ module srt (
|
||||
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
|
||||
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
|
||||
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn);
|
||||
|
||||
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
|
||||
flopen #(1) signflop(clk, Start, calcSign, rsign);
|
||||
flopen #(7) durflop(clk, Start, calcDur, dur);
|
||||
|
||||
counter divcounter(clk, Start, dur, done);
|
||||
srtcounter divcounter(clk, Start, dur, done);
|
||||
|
||||
// Divisor Selection logic
|
||||
assign Db = ~D;
|
||||
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
|
||||
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
|
||||
|
||||
// If only implementing division, use divide otfc
|
||||
// otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
|
||||
// otherwise use sotfc
|
||||
creg sotfcC(clk, Start, C);
|
||||
sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
|
||||
|
||||
// Adder input selection
|
||||
assign AddIn = Sqrt ? F : Dsel;
|
||||
|
||||
// Partial Product Generation
|
||||
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
|
||||
assign cin = ~Sqrt & qp;
|
||||
csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
|
||||
|
||||
otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
|
||||
|
||||
expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
|
||||
|
||||
signcalc signcalc(.XSign, .YSign, .calcSign);
|
||||
@ -121,42 +128,53 @@ module srtpreproc (
|
||||
|
||||
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
|
||||
logic [`XLEN-1:0] PosA, PosB;
|
||||
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
|
||||
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
|
||||
logic [`NF+4:0] SqrtX;
|
||||
|
||||
// Generate positive integer inputs if they are signed
|
||||
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
|
||||
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
|
||||
|
||||
// Calculate leading zeros of integer inputs
|
||||
lzc #(`XLEN) lzcA (PosA, zeroCntA);
|
||||
lzc #(`XLEN) lzcB (PosB, zeroCntB);
|
||||
|
||||
// Make integers have DIVLEN bits
|
||||
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
|
||||
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
|
||||
|
||||
// Shift integers to have leading ones
|
||||
assign PreprocA = ExtraA << (zeroCntA + 1);
|
||||
assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
|
||||
// Make mantissas have DIVLEN bits
|
||||
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
|
||||
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
|
||||
|
||||
// Selecting correct divider inputs
|
||||
assign DivX = Int ? PreprocA : PreprocX;
|
||||
assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
|
||||
|
||||
assign X = Sqrt ? SqrtX : {4'b0001, DivX};
|
||||
assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
|
||||
assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
|
||||
assign D = {4'b0001, Int ? PreprocB : PreprocY};
|
||||
|
||||
// Integer exponent and sign calculations
|
||||
assign intExp = zeroCntB - zeroCntA + 1;
|
||||
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
|
||||
|
||||
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
|
||||
// Number of cycles of divider
|
||||
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
|
||||
endmodule
|
||||
|
||||
/////////////////////////////////
|
||||
// Quotient Selection, Radix 2 //
|
||||
/////////////////////////////////
|
||||
module qsel2 ( // *** eventually just change to 4 bits
|
||||
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
|
||||
output logic qp, qz, qm
|
||||
input logic [`DIVLEN+3:`DIVLEN-1] ps, pc,
|
||||
input logic Sqrt,
|
||||
output logic qp, qz, qn
|
||||
);
|
||||
|
||||
logic [`DIVLEN+3:`DIVLEN] p, g;
|
||||
logic [`DIVLEN+3:`DIVLEN-1] p, g;
|
||||
logic magnitude, sign, cout;
|
||||
|
||||
// The quotient selection logic is presented for simplicity, not
|
||||
@ -167,8 +185,8 @@ module qsel2 ( // *** eventually just change to 4 bits
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
|
||||
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
|
||||
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
|
||||
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1])))));
|
||||
assign #1 sign = p[`DIVLEN+3] ^ cout;
|
||||
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
|
||||
(ps[52]^pc[52]));
|
||||
@ -180,7 +198,7 @@ module qsel2 ( // *** eventually just change to 4 bits
|
||||
// Produce quotient = +1, 0, or -1
|
||||
assign #1 qp = magnitude & ~sign;
|
||||
assign #1 qz = ~magnitude;
|
||||
assign #1 qm = magnitude & sign;
|
||||
assign #1 qn = magnitude & sign;
|
||||
endmodule
|
||||
|
||||
////////////////////////////////////
|
||||
@ -191,45 +209,36 @@ module fsel2 (
|
||||
input logic [`DIVLEN+3:0] C, S, SM,
|
||||
output logic [`DIVLEN+3:0] F
|
||||
);
|
||||
logic [`DIVLEN+3:0] FP, FN;
|
||||
logic [`DIVLEN+3:0] FP, FN, FZ;
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
assign FP = ~S & C;
|
||||
assign FN = SM | (C & (~C << 2));
|
||||
assign FZ = {(`DIVLEN+4){1'b0}};
|
||||
|
||||
// Choose which adder input will be used
|
||||
|
||||
assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0});
|
||||
assign F = sp ? FP : (sn ? FN : FZ);
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////////
|
||||
// On-The-Fly Converter, Radix 2 //
|
||||
///////////////////////////////////
|
||||
module otfc2 #(parameter N=64) (
|
||||
module otfc2 #(parameter N=66) (
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic qp, qz, qm,
|
||||
output logic [N-1:0] r
|
||||
input logic qp, qz, qn,
|
||||
output logic [N-3:0] r
|
||||
);
|
||||
|
||||
// The on-the-fly converter transfers the quotient
|
||||
// bits to the quotient as they come.
|
||||
//
|
||||
// This code follows the psuedocode presented in the
|
||||
// floating point chapter of the book. Right now,
|
||||
// it is written for Radix-2 division.
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
// bits to the quotient as they come.
|
||||
// Use this otfc for division only.
|
||||
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [N+1:0] QR, QMR;
|
||||
|
||||
flopr #(N+3) Qreg(clk, Start, QNext, Q);
|
||||
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
|
||||
mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
|
||||
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
|
||||
|
||||
always_comb begin
|
||||
@ -241,35 +250,76 @@ module otfc2 #(parameter N=64) (
|
||||
end else if (qz) begin
|
||||
QNext = {QR, 1'b0};
|
||||
QMNext = {QMR, 1'b1};
|
||||
end else begin // If qp and qz are not true, then qm is
|
||||
end else begin // If qp and qz are not true, then qn is
|
||||
QNext = {QMR, 1'b1};
|
||||
QMNext = {QMR, 1'b0};
|
||||
end
|
||||
end
|
||||
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
|
||||
assign r = Q[N] ? Q[N-1:2] : Q[N-2:1];
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////////////////
|
||||
// Square Root OTFC, Radix 2 //
|
||||
///////////////////////////////
|
||||
module softc2(
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic sp, sn,
|
||||
output logic S,
|
||||
module sotfc2(
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic sp, sn,
|
||||
input logic [`DIVLEN+3:0] C,
|
||||
output logic [`DIVLEN-2:0] Sq,
|
||||
output logic [`DIVLEN+3:0] F
|
||||
);
|
||||
// The on-the-fly converter transfers the square root
|
||||
// bits to the quotient as they come.
|
||||
// Use this otfc for division and square root.
|
||||
logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
|
||||
|
||||
flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
|
||||
mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
|
||||
flop #(`DIVLEN+4) Sreg(clk, SMux, S);
|
||||
|
||||
always_comb begin
|
||||
if (sp) begin
|
||||
SNext = S | ((C << 1) & ~(C << 2));
|
||||
SMNext = S;
|
||||
end else if (sn) begin
|
||||
SNext = SM | ((C << 1) & ~(C << 2));
|
||||
SMNext = SM;
|
||||
end else begin // If sp and sn are not true, then sz is
|
||||
SNext = S;
|
||||
SMNext = SM | ((C << 1) & ~(C << 2));
|
||||
end
|
||||
end
|
||||
assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
|
||||
|
||||
fsel2 fsel(sp, sn, C, S, SM, F);
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////////////////
|
||||
// C Register for SOTFC //
|
||||
//////////////////////////
|
||||
module creg(input logic clk,
|
||||
input logic Start,
|
||||
output logic [`DIVLEN+3:0] C
|
||||
);
|
||||
logic [`DIVLEN+3:0] CMux;
|
||||
|
||||
mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
|
||||
flop #(`DIVLEN+4) cflop(clk, CMux, C);
|
||||
endmodule
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
/////////////
|
||||
module counter(input logic clk,
|
||||
input logic req,
|
||||
input logic [$clog2(`XLEN+1)-1:0] dur,
|
||||
output logic done);
|
||||
module srtcounter(input logic clk,
|
||||
input logic req,
|
||||
input logic [$clog2(`XLEN+1)-1:0] dur,
|
||||
output logic done
|
||||
);
|
||||
|
||||
logic [$clog2(`XLEN+1)-1:0] count;
|
||||
logic [$clog2(`XLEN+1)-1:0] count;
|
||||
|
||||
// This block of control logic sequences the divider
|
||||
// through its iterations. You may modify it if you
|
||||
|
@ -1,4 +1,4 @@
|
||||
`define DIVLEN 64
|
||||
`include "wally-config.vh"
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
@ -39,37 +39,27 @@ endmodule
|
||||
// testbench //
|
||||
//////////
|
||||
module testbench;
|
||||
logic clk;
|
||||
logic req;
|
||||
logic done;
|
||||
logic Int;
|
||||
logic [63:0] a, b;
|
||||
logic [51:0] afrac, bfrac;
|
||||
logic [10:0] aExp, bExp;
|
||||
logic asign, bsign;
|
||||
logic [51:0] r;
|
||||
logic [63:0] rInt;
|
||||
logic [`DIVLEN-1:0] Quot;
|
||||
logic clk;
|
||||
logic req;
|
||||
logic done;
|
||||
logic Int;
|
||||
logic [`XLEN-1:0] a, b;
|
||||
logic [`NF-1:0] afrac, bfrac;
|
||||
logic [`NE-1:0] aExp, bExp;
|
||||
logic asign, bsign;
|
||||
logic [`NF-1:0] r;
|
||||
logic [`XLEN-1:0] rInt;
|
||||
logic [`DIVLEN-2:0] Quot;
|
||||
|
||||
// Test parameters
|
||||
parameter MEM_SIZE = 40000;
|
||||
parameter MEM_WIDTH = 64+64+64+64;
|
||||
|
||||
// INT TEST SIZES
|
||||
// `define memrem 63:0
|
||||
// `define memr 127:64
|
||||
// `define memb 191:128
|
||||
// `define mema 255:192
|
||||
|
||||
// FLOAT TEST SIZES
|
||||
// `define memr 63:0
|
||||
// `define memb 127:64
|
||||
// `define mema 191:128
|
||||
|
||||
// SQRT TEST SIZES
|
||||
`define memr 63:0
|
||||
`define mema 127:64
|
||||
// Test sizes
|
||||
`define memrem 63:0
|
||||
`define memr 127:64
|
||||
`define memb 191:128
|
||||
`define mema 255:192
|
||||
|
||||
// Test logicisters
|
||||
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
|
||||
@ -118,16 +108,16 @@ module testbench;
|
||||
b = Vec[`memb];
|
||||
{bsign, bExp, bfrac} = b;
|
||||
nextr = Vec[`memr];
|
||||
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
rInt = Quot;
|
||||
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
|
||||
rInt = {1'b1, Quot};
|
||||
req <= #5 1;
|
||||
end
|
||||
|
||||
// Apply directed test vectors read from file.
|
||||
|
||||
always @(posedge clk) begin
|
||||
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
rInt = Quot;
|
||||
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
|
||||
rInt = {1'b1, Quot};
|
||||
if (done) begin
|
||||
if (~Int & ~Sqrt) begin
|
||||
req <= #5 1;
|
||||
@ -165,15 +155,14 @@ module testbench;
|
||||
req <= #5 1;
|
||||
diffp = correctr[51:0] - r;
|
||||
diffn = r - correctr[51:0];
|
||||
if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
|
||||
if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
|
||||
begin
|
||||
errors = errors + 1;
|
||||
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
|
||||
$display("failed\n");
|
||||
$stop;
|
||||
end
|
||||
if (afrac === 52'hxxxxxxxxxxxxx) begin
|
||||
$display("%d Tests completed successfully", testnum);
|
||||
$display("%d Tests completed successfully", testnum-errors);
|
||||
$stop; end
|
||||
end
|
||||
end
|
||||
|
@ -80,17 +80,17 @@ module testbenchfp;
|
||||
logic CvtResSgnE;
|
||||
logic [`NE:0] CvtCalcExpE; // the calculated expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic [`DIVLEN+2:0] Quot;
|
||||
logic [`QLEN-1-(`RADIX/4):0] Quot;
|
||||
logic CvtResDenormUfE;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
|
||||
logic [`DURLEN-1:0] EarlyTermShift;
|
||||
logic DivStart, DivBusy;
|
||||
logic reset = 1'b0;
|
||||
logic [`DIVLEN-1:0] DivX;
|
||||
logic [`DIVLEN-1:0] Dpreproc;
|
||||
logic [`DIVLEN+3:0] WSN, WS;
|
||||
logic [`DIVLEN+3:0] WCN, WC;
|
||||
logic [`DIVLEN+3:0] NextWSN, WS;
|
||||
logic [`DIVLEN+3:0] NextWCN, WC;
|
||||
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
|
||||
logic [`DURLEN-1:0] Dur;
|
||||
|
||||
// in-between FMA signals
|
||||
logic Mult;
|
||||
@ -679,15 +679,15 @@ module testbenchfp;
|
||||
.Pe, .ZmSticky, .KillProd);
|
||||
|
||||
postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
|
||||
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
|
||||
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
|
||||
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky,
|
||||
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
|
||||
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky),
|
||||
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
|
||||
.XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
|
||||
.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
|
||||
.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
|
||||
.FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
|
||||
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
|
||||
.PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
|
||||
.FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
|
||||
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
|
||||
.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
|
||||
|
||||
fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal),
|
||||
.XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
|
||||
@ -695,11 +695,10 @@ module testbenchfp;
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
|
||||
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
|
||||
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
|
||||
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
|
||||
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
|
||||
.Quot, .Rem(), .DivCalcExpM(DivCalcExp));
|
||||
divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp),
|
||||
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart),
|
||||
.StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp),
|
||||
.EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone);
|
||||
|
||||
assign CmpFlg[3:0] = 0;
|
||||
|
||||
@ -854,7 +853,7 @@ end
|
||||
|
||||
// check if result is correct
|
||||
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
errors += 1;
|
||||
$display("There is an error in %s", Tests[TestNum]);
|
||||
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
|
||||
|
@ -114,6 +114,7 @@ logic [3:0] dummy;
|
||||
"arch32f": if (`F_SUPPORTED) tests = arch32f;
|
||||
"imperas32i": tests = imperas32i;
|
||||
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
|
||||
// "wally32d": if (`D_SUPPORTED) tests = wally32d;
|
||||
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
|
||||
"wally32a": if (`A_SUPPORTED) tests = wally32a;
|
||||
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;
|
||||
|
@ -34,7 +34,7 @@
|
||||
string tvpaths[] = '{
|
||||
"../../addins/imperas-riscv-tests/work/",
|
||||
"../../tests/riscof/work/riscv-arch-test/",
|
||||
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/",
|
||||
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", //
|
||||
"../../tests/imperas-riscv-tests/work/",
|
||||
"../../benchmarks/coremark/work/",
|
||||
"../../addins/embench-iot/"
|
||||
|
@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1)
|
||||
cp -f ../pipelined/regression/power.saif .
|
||||
endif
|
||||
dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out
|
||||
rm -rf $(OUTPUTDIR)/hdl
|
||||
# rm -rf $(OUTPUTDIR)/hdl
|
||||
rm -rf $(OUTPUTDIR)/WORK
|
||||
rm -rf $(OUTPUTDIR)/alib-52
|
||||
|
||||
|
@ -11,6 +11,7 @@ import numpy as np
|
||||
from ppa.ppaAnalyze import noOutliers
|
||||
from matplotlib import ticker
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
def synthsintocsv():
|
||||
@ -59,6 +60,7 @@ def synthsintocsv():
|
||||
writer.writerow([width, config, special, tech, freq, delay, area])
|
||||
file.close()
|
||||
|
||||
|
||||
def synthsfromcsv(filename):
|
||||
Synth = namedtuple("Synth", "width config special tech freq delay area")
|
||||
with open(filename, newline='') as csvfile:
|
||||
@ -74,10 +76,16 @@ def synthsfromcsv(filename):
|
||||
allSynths[i] = Synth(*allSynths[i])
|
||||
return allSynths
|
||||
|
||||
|
||||
def freqPlot(tech, width, config):
|
||||
''' plots delay, area for syntheses with specified tech, module, width
|
||||
'''
|
||||
|
||||
current_directory = os.getcwd()
|
||||
final_directory = os.path.join(current_directory, 'plots/wally')
|
||||
if not os.path.exists(final_directory):
|
||||
os.makedirs(final_directory)
|
||||
|
||||
freqsL, delaysL, areasL = ([[], []] for i in range(3))
|
||||
for oneSynth in allSynths:
|
||||
if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special):
|
||||
@ -151,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False):
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def plotFeatures(tech, width, config):
|
||||
delays, areas, labels = ([] for i in range(3))
|
||||
freq = techdict[tech].targfreq
|
||||
@ -168,7 +177,8 @@ def plotFeatures(tech, width, config):
|
||||
titlestr = tech+'_'+width+config
|
||||
plt.title(titlestr)
|
||||
plt.savefig('./plots/wally/features_'+titlestr+'.png')
|
||||
|
||||
|
||||
|
||||
def plotConfigs(tech, special=''):
|
||||
delays, areas, labels = ([] for i in range(3))
|
||||
freq = techdict[tech].targfreq
|
||||
@ -207,7 +217,8 @@ def normAreaDelay(special=''):
|
||||
ax.set_ylabel('Area (add32)')
|
||||
ax.legend(handles = fullLeg, loc='upper left')
|
||||
plt.savefig('./plots/wally/normAreaDelay.png')
|
||||
|
||||
|
||||
|
||||
def addFO4axis(fig, ax, tech):
|
||||
fo4 = techdict[tech].fo4
|
||||
|
||||
|
@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true"
|
||||
# Due to parameterized Verilog must use analyze/elaborate and not
|
||||
# read_verilog/vhdl (change to pull in Verilog and/or VHDL)
|
||||
#
|
||||
set alib_library_analysis_path ./$outputDir
|
||||
#set alib_library_analysis_path ./$outputDir
|
||||
define_design_lib WORK -path ./$outputDir/WORK
|
||||
analyze -f sverilog -lib WORK $my_verilog_files
|
||||
elaborate $my_toplevel -lib WORK
|
||||
@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }
|
||||
|
||||
|
@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
|
||||
current_dir = $(shell pwd)
|
||||
XLEN ?= 64
|
||||
|
||||
all: root build_arch # build_wally memfile
|
||||
all: root build_arch #build_wally memfile
|
||||
|
||||
root:
|
||||
mkdir -p $(work_dir)
|
||||
|
@ -108,7 +108,7 @@ class spike(pluginTemplate):
|
||||
|
||||
#TODO: The following assumes you are using the riscv-gcc toolchain. If
|
||||
# not please change appropriately
|
||||
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
|
||||
self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))
|
||||
|
||||
def runTests(self, testList):
|
||||
|
||||
@ -158,7 +158,12 @@ class spike(pluginTemplate):
|
||||
# echo statement.
|
||||
if self.target_run:
|
||||
# set up the simulation command. Template is for spike. Please change.
|
||||
simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
|
||||
if ('NO_SAIL=True' in testentry['macros']):
|
||||
# if the tests can't run on SAIL we copy the reference output to the src directory
|
||||
reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
|
||||
simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
|
||||
else:
|
||||
simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
|
||||
else:
|
||||
simcmd = 'echo "NO RUN"'
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
hart_ids: [0]
|
||||
hart0:
|
||||
ISA: RV32IMAFCZicsr_Zifencei
|
||||
ISA: RV32IMAFDCZicsr_Zifencei
|
||||
physical_addr_sz: 32
|
||||
User_Spec_Version: '2.3'
|
||||
supported_xlen: [32]
|
||||
misa:
|
||||
reset-val: 0x40001125
|
||||
reset-val: 0x4000112D
|
||||
rv32:
|
||||
accessible: true
|
||||
mxl:
|
||||
@ -23,6 +23,6 @@ hart0:
|
||||
warl:
|
||||
dependency_fields: []
|
||||
legal:
|
||||
- extensions[25:0] bitmask [0x0001125, 0x0000000]
|
||||
- extensions[25:0] bitmask [0x000112D, 0x0000000]
|
||||
wr_illegal:
|
||||
- Unchanged
|
Loading…
Reference in New Issue
Block a user