mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally
This commit is contained in:
commit
bde1c5eb1b
@ -94,12 +94,12 @@
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// largest length in IEU/FPU
|
||||
`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
|
||||
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
|
||||
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
|
||||
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
|
||||
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
|
||||
`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
|
||||
`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
|
||||
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
|
@ -44,16 +44,18 @@ configs = [
|
||||
grepstr="All lints run with no errors or warnings"
|
||||
)
|
||||
]
|
||||
def getBuildrootTC(short):
|
||||
def getBuildrootTC(boot):
|
||||
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
|
||||
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
|
||||
if short:
|
||||
if boot:
|
||||
name="buildrootboot"
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-pipelined.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
|
||||
BRgrepstr="WallyHostname login:"
|
||||
else:
|
||||
name="buildroot"
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
|
||||
BRgrepstr=str(INSTR_LIMIT)+" instructions"
|
||||
else:
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV 0 1 0\n!"
|
||||
BRgrepstr=str(MAX_EXPECTED)+" instructions"
|
||||
return TestCase(name="buildroot",variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
|
||||
return TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
|
||||
|
||||
tc = TestCase(
|
||||
name="buildroot-checkpoint",
|
||||
@ -136,14 +138,14 @@ def main():
|
||||
os.system('./make-tests.sh | tee ./logs/make-tests.log')
|
||||
|
||||
if '-all' in sys.argv:
|
||||
TIMEOUT_DUR = 30*3600 # seconds
|
||||
configs.append(getBuildrootTC(short=False))
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs.append(getBuildrootTC(boot=True))
|
||||
elif '-buildroot' in sys.argv:
|
||||
TIMEOUT_DUR = 30*3600 # seconds
|
||||
configs=[getBuildrootTC(short=False)]
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs=[getBuildrootTC(boot=True)]
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
configs.append(getBuildrootTC(short=True))
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
|
||||
# Scale the number of concurrent processes to the number of test cases, but
|
||||
# max out at a limited number of concurrent processes to not overwhelm the system
|
||||
|
@ -7,4 +7,4 @@
|
||||
# sqrt - test square root
|
||||
# all - test everything
|
||||
|
||||
vsim -c -do "do testfloat.do rv64fpquad all"
|
||||
vsim -c -do "do testfloat.do rv64fpquad $1"
|
@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
|
||||
|
9
pipelined/src/cache/cache.sv
vendored
9
pipelined/src/cache/cache.sv
vendored
@ -43,6 +43,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
input logic [`PA_BITS-1:0] PAdr, // physical address
|
||||
input logic [(`XLEN-1)/8:0] ByteMask,
|
||||
input logic [`XLEN-1:0] FinalWriteData,
|
||||
input logic [`FLEN-1:0] FWriteDataM,
|
||||
input logic FLoad2,
|
||||
input logic FpLoadStoreM,
|
||||
output logic CacheCommitted,
|
||||
output logic CacheStall,
|
||||
// to performance counters to cpu
|
||||
@ -120,7 +123,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
|
||||
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
|
||||
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
|
||||
.Invalidate(InvalidateCacheM));
|
||||
@ -159,6 +162,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (`LLEN>`XLEN)
|
||||
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
|
||||
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
|
||||
else
|
||||
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
|
||||
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
|
||||
|
7
pipelined/src/cache/cacheway.sv
vendored
7
pipelined/src/cache/cacheway.sv
vendored
@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
input logic [$clog2(NUMLINES)-1:0] RAdr,
|
||||
input logic [`PA_BITS-1:0] PAdr,
|
||||
input logic [LINELEN-1:0] CacheWriteData,
|
||||
input logic FLoad2,
|
||||
input logic SetValidWay,
|
||||
input logic ClearValidWay,
|
||||
input logic SetDirtyWay,
|
||||
@ -74,6 +75,12 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Write Enable demux
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if(`LLEN>`XLEN)begin
|
||||
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
|
||||
onehotdecoder #(LOGWPL) adrdec(
|
||||
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
|
||||
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
|
||||
end else
|
||||
onehotdecoder #(LOGWPL) adrdec(
|
||||
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
|
||||
// If writing the whole line set all write enables to 1, else only set the correct word.
|
||||
|
@ -2,14 +2,89 @@
|
||||
|
||||
module divshiftcalc(
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
input logic [`NE:0] DivCalcExpM,
|
||||
input logic [`NE+1:0] DivCalcExpM,
|
||||
input logic [`FMTBITS-1:0] FmtM,
|
||||
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
|
||||
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
|
||||
output logic [`NE:0] CorrDivExp
|
||||
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
|
||||
output logic DivResDenorm,
|
||||
output logic [`NE+1:0] DivDenormShift
|
||||
);
|
||||
logic [`NE+1:0] NormShift;
|
||||
logic [`NE+1:0] Nf, NfPlus1;
|
||||
|
||||
assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
|
||||
// the quotent is in the range [.5,2)
|
||||
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
|
||||
assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
|
||||
// is the result denromalized
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
|
||||
assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
|
||||
// select the proper fraction lengnth
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Nf = (`NE+2)'(`NF);
|
||||
assign NfPlus1 = (`NE+2)'(`NF+1);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
|
||||
assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
Nf = (`NE+2)'(`NF);
|
||||
NfPlus1 = (`NE+2)'(`NF+1);
|
||||
end
|
||||
`FMT1: begin
|
||||
Nf = (`NE+2)'(`NF1);
|
||||
NfPlus1 = (`NE+2)'(`NF1+1);
|
||||
end
|
||||
`FMT2: begin
|
||||
Nf = (`NE+2)'(`NF2);
|
||||
NfPlus1 = (`NE+2)'(`NF2+1);
|
||||
end
|
||||
default: begin
|
||||
Nf = 1'bx;
|
||||
NfPlus1 = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
Nf = (`NE+2)'(`Q_NF);
|
||||
NfPlus1 = (`NE+2)'(`Q_NF+1);
|
||||
end
|
||||
2'h1: begin
|
||||
Nf = (`NE+2)'(`D_NF);
|
||||
NfPlus1 = (`NE+2)'(`D_NF+1);
|
||||
end
|
||||
2'h0: begin
|
||||
Nf = (`NE+2)'(`S_NF);
|
||||
NfPlus1 = (`NE+2)'(`S_NF+1);
|
||||
end
|
||||
2'h2: begin
|
||||
Nf = (`NE+2)'(`H_NF);
|
||||
NfPlus1 = (`NE+2)'(`H_NF+1);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// if the result is denormalized
|
||||
// 00000000x.xxxxxx... Exp = DivCalcExp
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivCalcExp+NF+1-1
|
||||
assign DivDenormShift = Nf+DivCalcExpM;
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivCalcExp
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
assign NormShift = Nf;
|
||||
// if the shift amount is negitive then dont shift (keep sticky bit)
|
||||
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
|
||||
|
||||
// *** may be able to reduce shifter size
|
||||
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
|
||||
|
||||
endmodule
|
||||
|
@ -33,8 +33,8 @@ module fctrl (
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
|
||||
@ -121,7 +121,7 @@ module fctrl (
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
|
@ -25,7 +25,7 @@ module flags(
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
|
||||
output logic DivByZero,
|
||||
output logic IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
|
||||
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
|
||||
output logic [4:0] PostProcFlgM // flags
|
||||
);
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
@ -34,6 +34,7 @@ module flags(
|
||||
logic IntInexact; // integer inexact flag
|
||||
logic FmaInvalid; // integer invalid flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic Underflow; // Underflow flag
|
||||
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
|
||||
logic ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
|
||||
|
||||
@ -88,7 +89,7 @@ module flags(
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
|
||||
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
@ -98,11 +99,11 @@ module flags(
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
|
||||
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
|
||||
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
@ -133,8 +134,9 @@ module flags(
|
||||
|
||||
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
|
||||
|
||||
|
||||
assign DivByZero = YZeroM&DivOp;
|
||||
// if dividing by zero and not 0/0
|
||||
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
|
||||
assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn);
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
|
@ -120,8 +120,9 @@ module fmashiftcalc(
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
assign DenormShift = PreResultDenorm&~KillProdM ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// set and calculate the shift input and amount
|
||||
// - shift once if killing a product and the result is denormalized
|
||||
assign FmaShiftIn = {3'b0, SumM};
|
||||
assign FmaShiftAmt = FmaNormCntM+DenormShift;
|
||||
assign FmaShiftAmt = (FmaNormCntM&{$clog2(3*`NF+7){~KillProdM}})+DenormShift;
|
||||
endmodule
|
||||
|
@ -41,10 +41,12 @@ module fpu (
|
||||
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled?
|
||||
output logic FRegWriteM, // FP register write enable
|
||||
output logic FpLoadM, // Fp load instruction?
|
||||
output logic FpLoadStoreM, // Fp load instruction?
|
||||
output logic FLoad2,
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, // integer register write enables
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
|
||||
output logic [1:0] FResSelW,
|
||||
@ -124,7 +126,10 @@ module fpu (
|
||||
|
||||
//divide signals
|
||||
logic [`DIVLEN+2:0] Quot;
|
||||
logic [`NE:0] DivCalcExpM;
|
||||
logic [`NE+1:0] DivCalcExpM;
|
||||
logic DivNegStickyM;
|
||||
logic DivStickyM;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
@ -289,8 +294,19 @@ module fpu (
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE};
|
||||
if (`LLEN==`XLEN) begin
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
end else begin
|
||||
logic [`FLEN-1:0] FWriteDataE;
|
||||
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
|
||||
else assign FLoad2 = FmtM;
|
||||
|
||||
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
|
||||
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
|
||||
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
|
||||
|
||||
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
|
||||
end
|
||||
|
||||
// NaN Block SrcA
|
||||
generate
|
||||
@ -353,13 +369,13 @@ module fpu (
|
||||
// ||| |||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign FpLoadM = FResSelM[1];
|
||||
assign FpLoadStoreM = FResSelM[1];
|
||||
|
||||
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
|
||||
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
|
||||
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
|
||||
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
|
||||
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
|
||||
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
|
||||
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
|
||||
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM,
|
||||
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
|
@ -3,14 +3,20 @@
|
||||
module lzacorrection(
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
input logic FmaOp,
|
||||
input logic DivOp,
|
||||
input logic DivResDenorm,
|
||||
input logic [`NE+1:0] DivCalcExpM,
|
||||
input logic [`NE+1:0] DivDenormShift,
|
||||
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic SumZero,
|
||||
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] CorrDivExp,
|
||||
output logic [`NE+1:0] SumExp // exponent of the normalized sum
|
||||
);
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
|
||||
logic ResDenorm; // is the result denormalized
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
@ -19,11 +25,17 @@ module lzacorrection(
|
||||
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
|
||||
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
|
||||
assign CorrQuotShifted = {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
|
||||
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
|
||||
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
|
||||
assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2};
|
||||
endmodule
|
@ -30,84 +30,98 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess(
|
||||
// general signals
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic [1:0] PostProcSelM, // select result to be written to fp register
|
||||
//fma signals
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
|
||||
//divide signals
|
||||
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
|
||||
input logic [`NE+1:0] DivCalcExpM, // the calculated expoent
|
||||
input logic DivStickyM,
|
||||
input logic DivNegStickyM,
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
// conversion signals
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NE:0] DivCalcExpM, // the calculated expoent
|
||||
input logic CvtResDenormUfM,
|
||||
input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by
|
||||
input logic CvtResSgnM, // the result's sign
|
||||
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
|
||||
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic IntZeroM, // is the input zero
|
||||
input logic [1:0] PostProcSelM, // select result to be written to fp register
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
// final results
|
||||
output logic [`FLEN-1:0] PostProcResM, // FMA final result
|
||||
output logic [4:0] PostProcFlgM,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
|
||||
);
|
||||
|
||||
|
||||
|
||||
// general signals
|
||||
logic [`NF-1:0] ResFrac; // Result fraction
|
||||
logic [`NE-1:0] ResExp; // Result exponent
|
||||
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`CORRSHIFTSZ-1:0] CorrShifted; // corectly shifted fraction
|
||||
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
|
||||
logic SumZero; // is the sum zero
|
||||
logic Sticky; // Sticky bit
|
||||
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Round; // bits needed to determine rounding
|
||||
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic Mult; // multiply opperation
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
logic IntInvalid, Overflow, Underflow, Invalid; // flags
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic IntInvalid, Overflow, Invalid; // flags
|
||||
logic [`NE+1:0] RoundExp;
|
||||
logic [`NE:0] CorrDivExp;
|
||||
logic [1:0] NegResMSBS;
|
||||
logic CvtOp;
|
||||
logic FmaOp;
|
||||
logic CvtResUf;
|
||||
logic DivOp;
|
||||
logic InfIn;
|
||||
logic ResSgn;
|
||||
logic RoundSgn;
|
||||
logic NaNIn;
|
||||
logic DivByZero;
|
||||
logic UfLSBRes;
|
||||
logic Sqrt;
|
||||
logic [`FMTBITS-1:0] OutFmt;
|
||||
// fma signals
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
|
||||
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
// division singals
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
|
||||
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
|
||||
logic [`NE+1:0] CorrDivExp;
|
||||
logic DivByZero;
|
||||
logic DivResDenorm;
|
||||
logic [`NE+1:0] DivDenormShift;
|
||||
// conversion signals
|
||||
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic [1:0] NegResMSBS;
|
||||
logic CvtResUf;
|
||||
// readability signals
|
||||
logic Mult; // multiply opperation
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic CvtOp;
|
||||
logic FmaOp;
|
||||
logic DivOp;
|
||||
logic InfIn;
|
||||
logic NaNIn;
|
||||
logic Sqrt;
|
||||
|
||||
// signals to help readability
|
||||
assign Signed = FOpCtrlM[0];
|
||||
@ -140,7 +154,7 @@ module postprocess(
|
||||
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
|
||||
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
|
||||
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
@ -154,7 +168,7 @@ module postprocess(
|
||||
end
|
||||
2'b01: begin //div ***prob can take out
|
||||
ShiftAmt = DivShiftAmt;
|
||||
ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
|
||||
ShiftIn = DivShiftIn;
|
||||
end
|
||||
default: begin
|
||||
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
|
||||
@ -165,7 +179,8 @@ module postprocess(
|
||||
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
|
||||
.SumZero, .Shifted, .SumExp, .CorrShifted);
|
||||
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM,
|
||||
.CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
@ -179,6 +194,7 @@ module postprocess(
|
||||
|
||||
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
|
||||
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
|
||||
.DivStickyM, .DivNegStickyM,
|
||||
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -197,7 +213,7 @@ module postprocess(
|
||||
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
|
||||
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero,
|
||||
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
|
||||
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
|
||||
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
@ -206,6 +222,7 @@ module postprocess(
|
||||
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
|
||||
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
|
||||
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
|
||||
.XInfM, .YInfM, .DivOp,
|
||||
.DivByZero, .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
|
||||
|
||||
endmodule
|
||||
|
@ -4,26 +4,27 @@ module resultselect(
|
||||
input logic XSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn,
|
||||
input logic XZeroM,
|
||||
input logic XInfM, YInfM,
|
||||
input logic XZeroM, ZZeroM,
|
||||
input logic IntZeroM,
|
||||
input logic NaNIn,
|
||||
input logic IntToFp,
|
||||
input logic Int64,
|
||||
input logic Signed,
|
||||
input logic CvtOp,
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
|
||||
input logic DivOp,
|
||||
input logic FmaOp,
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
|
||||
input logic Plus1,
|
||||
input logic DivByZero,
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ResSgn, // the res's sign
|
||||
input logic [`FLEN:0] RoundAdd, // how much to add to the res
|
||||
input logic IntInvalid, Invalid, Overflow, // flags
|
||||
@ -35,16 +36,17 @@ module resultselect(
|
||||
output logic [1:0] NegResMSBS,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // final res
|
||||
);
|
||||
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
|
||||
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
|
||||
logic OfResMax;
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic KillRes;
|
||||
logic SelOfRes;
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
|
||||
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
|
||||
@ -59,8 +61,7 @@ module resultselect(
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
assign NormRes = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
|
||||
@ -75,8 +76,7 @@ module resultselect(
|
||||
|
||||
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
@ -93,8 +93,7 @@ module resultselect(
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
@ -107,8 +106,7 @@ module resultselect(
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
@ -122,8 +120,7 @@ module resultselect(
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
@ -136,7 +133,6 @@ module resultselect(
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
end
|
||||
OfRes = (`FLEN)'(0);
|
||||
KillProdRes = (`FLEN)'(0);
|
||||
UfRes = (`FLEN)'(0);
|
||||
NormRes = (`FLEN)'(0);
|
||||
end
|
||||
@ -156,8 +152,7 @@ module resultselect(
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
@ -170,8 +165,7 @@ module resultselect(
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
@ -185,8 +179,7 @@ module resultselect(
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
@ -200,9 +193,8 @@ module resultselect(
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
// zero is exact fi dividing by infinity so don't add 1
|
||||
UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
|
||||
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
@ -217,22 +209,20 @@ module resultselect(
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
|
||||
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
|
||||
// output infinity with result sign if divide by zero
|
||||
if(`IEEE754) begin
|
||||
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
|
||||
YNaNM&~CvtOp ? YNaNRes :
|
||||
ZNaNM&FmaOp ? ZNaNRes :
|
||||
Invalid ? InvalidRes :
|
||||
Overflow|DivByZero|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
SelOfRes ? OfRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end else begin
|
||||
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
|
||||
Overflow|DivByZero|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
SelOfRes ? OfRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end
|
||||
|
@ -12,10 +12,10 @@ module round(
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic FmaOp,
|
||||
input logic DivOp,
|
||||
input logic CvtOp,
|
||||
input logic ToInt,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic CvtResDenormUfM,
|
||||
input logic ToInt,
|
||||
input logic CvtOp,
|
||||
input logic CvtResUf,
|
||||
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
@ -24,7 +24,9 @@ module round(
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic RoundSgn, // the result's sign
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NE:0] CorrDivExp, // the calculated expoent
|
||||
input logic [`NE+1:0] CorrDivExp, // the calculated expoent
|
||||
input logic DivStickyM, // sticky bit
|
||||
input logic DivNegStickyM,
|
||||
output logic UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResFrac, // Result fraction
|
||||
@ -149,7 +151,7 @@ module round(
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
|
||||
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp | DivStickyM&DivOp;
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
@ -223,9 +225,11 @@ module round(
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
|
||||
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
|
||||
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
// - for FMA or if division has a negitive sticky bit
|
||||
assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
|
||||
assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
@ -305,7 +309,7 @@ module round(
|
||||
case(PostProcSelM)
|
||||
2'b10: RoundExp = SumExp; // fma
|
||||
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
|
||||
2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
|
||||
2'b01: RoundExp = CorrDivExp; // divide
|
||||
default: RoundExp = 0;
|
||||
endcase
|
||||
|
||||
|
@ -124,7 +124,13 @@ module datapath (
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
|
||||
// floating point interactions: fcvt, fp stores
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
assign WriteDataE = ForwardedSrcBE;
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end else if (`F_SUPPORTED) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
|
||||
|
@ -227,7 +227,7 @@ module ifu (
|
||||
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
|
||||
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
|
||||
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
|
||||
.CacheFetchLine(ICacheFetchLine),
|
||||
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
|
||||
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
|
||||
.Cacheable(CacheableF),
|
||||
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
|
||||
|
@ -57,7 +57,9 @@ module lsu (
|
||||
input logic BigEndianM,
|
||||
input logic sfencevmaM,
|
||||
// fpu
|
||||
input logic FpLoadM,
|
||||
input logic [`FLEN-1:0] FWriteDataM,
|
||||
input logic FLoad2,
|
||||
input logic FpLoadStoreM,
|
||||
// faults
|
||||
output logic LoadPageFaultM, StoreAmoPageFaultM,
|
||||
output logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
@ -235,7 +237,7 @@ module lsu (
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
|
||||
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
|
||||
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
|
||||
.ByteMask(ByteMaskM), .WordCount,
|
||||
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
|
||||
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
|
||||
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
|
||||
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
|
||||
@ -269,7 +271,7 @@ module lsu (
|
||||
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
|
||||
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
|
||||
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
|
||||
.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
|
||||
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MW Pipeline Register
|
||||
|
@ -35,7 +35,7 @@ module subwordread
|
||||
input logic [`LLEN-1:0] ReadDataWordMuxM,
|
||||
input logic [2:0] LSUPAdrM,
|
||||
input logic [2:0] Funct3M,
|
||||
input logic FpLoadM,
|
||||
input logic FpLoadStoreM,
|
||||
output logic [`LLEN-1:0] ReadDataM
|
||||
);
|
||||
|
||||
@ -83,16 +83,16 @@ module subwordread
|
||||
case(Funct3M)
|
||||
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
|
||||
3'b001: if(`ZFH_SUPPORTED)
|
||||
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
|
||||
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
|
||||
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
|
||||
3'b010: if(`F_SUPPORTED)
|
||||
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw
|
||||
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
|
||||
else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw
|
||||
3'b011: if(`D_SUPPORTED)
|
||||
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld
|
||||
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
|
||||
else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld
|
||||
3'b100: if(`Q_SUPPORTED)
|
||||
ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
|
||||
ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
|
||||
else
|
||||
ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
|
||||
@ -122,10 +122,10 @@ module subwordread
|
||||
case(Funct3M)
|
||||
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
|
||||
3'b001: if(`ZFH_SUPPORTED)
|
||||
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
|
||||
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
|
||||
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
|
||||
3'b010: if(`F_SUPPORTED)
|
||||
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw
|
||||
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
|
||||
else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw
|
||||
3'b011: ReadDataM = ReadDataWordMuxM; // fld
|
||||
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
|
||||
|
@ -92,13 +92,15 @@ module wallypipelinedcore (
|
||||
logic FStallD;
|
||||
logic FWriteIntE;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic FLoad2;
|
||||
logic [`FLEN-1:0] FWriteDataM;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FRegWriteM;
|
||||
logic FPUStallD;
|
||||
logic FpLoadM;
|
||||
logic FpLoadStoreM;
|
||||
logic [1:0] FResSelW;
|
||||
logic [4:0] SetFflagsM;
|
||||
|
||||
@ -253,7 +255,8 @@ module wallypipelinedcore (
|
||||
.AtomicM, .TrapM,
|
||||
.CommittedM, .DCacheMiss, .DCacheAccess,
|
||||
.SquashSCW,
|
||||
.FpLoadM,
|
||||
.FpLoadStoreM,
|
||||
.FWriteDataM, .FLoad2,
|
||||
//.DataMisalignedM(DataMisalignedM),
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataE,
|
||||
.ReadDataW, .FlushDCacheM,
|
||||
@ -391,10 +394,12 @@ module wallypipelinedcore (
|
||||
.RdM, .RdW, // which FP register to write to (from IEU)
|
||||
.STATUS_FS, // is floating-point enabled?
|
||||
.FRegWriteM, // FP register write enable
|
||||
.FpLoadM,
|
||||
.FpLoadStoreM,
|
||||
.FLoad2,
|
||||
.FStallD, // Stall the decode stage
|
||||
.FWriteIntE, // integer register write enable
|
||||
.FWriteDataE, // Data to be written to memory
|
||||
.FWriteDataM, // Data to be written to memory
|
||||
.FIntResM, // data to be written to integer register
|
||||
.FCvtIntResW, // fp -> int conversion result to be stored in int register
|
||||
.FResSelW, // fpu result selection
|
||||
|
@ -23,5 +23,10 @@ qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
|
||||
gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
|
||||
./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
|
||||
|
||||
inttestgen: inttestgen.c
|
||||
gcc -lm -o inttestgen inttestgen.c
|
||||
./inttestgen
|
||||
|
||||
clean:
|
||||
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
|
||||
|
||||
|
@ -36,41 +36,45 @@ module srtradix4 (
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic [`NF:0] XManE, YManE,
|
||||
input logic [`XLEN-1:0] SrcA, SrcB,
|
||||
input logic XZeroE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic W64, // 32-bit ints on XLEN=64
|
||||
input logic Signed, // Interpret integers as signed 2's complement
|
||||
input logic Int, // Choose integer inputs
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
|
||||
output logic DivDone,
|
||||
output logic DivStickyE,
|
||||
output logic DivNegStickyE,
|
||||
output logic [`DIVLEN+2:0] Quot,
|
||||
output logic [`XLEN-1:0] Rem, // *** later handle integers
|
||||
output logic [`NE:0] DivCalcExpE
|
||||
output logic [`NE+1:0] DivCalcExpE
|
||||
);
|
||||
|
||||
// logic qp, qz, qm; // quotient is +1, 0, or -1
|
||||
logic [3:0] q;
|
||||
logic [`NE:0] DivCalcExp;
|
||||
logic [`DIVLEN:0] X;
|
||||
logic [`NE+1:0] DivCalcExp;
|
||||
logic [`DIVLEN-1:0] X;
|
||||
logic [`DIVLEN-1:0] Dpreproc;
|
||||
logic [`DIVLEN+3:0] WS, WSA, WSN;
|
||||
logic [`DIVLEN+3:0] WC, WCA, WCN;
|
||||
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp;
|
||||
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
|
||||
logic intSign;
|
||||
|
||||
srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
|
||||
srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X,
|
||||
.XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign);
|
||||
|
||||
// Top Muxes and Registers
|
||||
// When start is asserted, the inputs are loaded into the divider.
|
||||
// Otherwise, the divisor is retained and the partial remainder
|
||||
// is fed back for the next iteration.
|
||||
// - assumed one is added here since all numbers are normlaized
|
||||
// *** wait what about zero? is that specal case? can the divider handle it?
|
||||
// - when the start signal is asserted X and 0 are loaded into WS and WC
|
||||
// - otherwise load WSA into the flipflop
|
||||
// *** what does N and A stand for?
|
||||
// *** change shift amount for radix4
|
||||
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
|
||||
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
|
||||
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
|
||||
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
|
||||
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
|
||||
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
|
||||
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
|
||||
@ -88,7 +92,7 @@ module srtradix4 (
|
||||
qsel4 qsel4(.D, .WS, .WC, .q);
|
||||
|
||||
// Store the expoenent and sign until division is DivDone
|
||||
flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
|
||||
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
|
||||
|
||||
// Divisor Selection logic
|
||||
// *** radix 4 change to choose -2 to 2
|
||||
@ -114,9 +118,10 @@ module srtradix4 (
|
||||
//*** change for radix 4
|
||||
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
|
||||
|
||||
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
|
||||
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
|
||||
|
||||
divcounter divcounter(clk, DivStart, DivDone);
|
||||
earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
|
||||
.YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
|
||||
|
||||
endmodule
|
||||
|
||||
@ -124,28 +129,36 @@ endmodule
|
||||
// Submodules //
|
||||
////////////////
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
/////////////
|
||||
module divcounter(input logic clk,
|
||||
module earlytermination(
|
||||
input logic clk,
|
||||
input logic [`DIVLEN+3:0] WS, WC,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic DivStart,
|
||||
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
|
||||
output logic DivStickyE,
|
||||
output logic DivNegStickyE,
|
||||
output logic DivDone);
|
||||
|
||||
logic [5:0] count;
|
||||
|
||||
// This block of control logic sequences the divider
|
||||
// through its iterations. You may modify it if you
|
||||
// build a divider which completes in fewer iterations.
|
||||
// You are not responsible for the (trivial) circuit
|
||||
// design of the block.
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] Count;
|
||||
logic WZero;
|
||||
logic [`DIVLEN+3:0] W;
|
||||
|
||||
assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
|
||||
assign DivDone = (DivStickyE | WZero);
|
||||
assign DivStickyE = ~|Count;
|
||||
assign W = WC+WS;
|
||||
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
|
||||
assign EarlyTermShiftDiv2E = Count;
|
||||
// +1 for setup
|
||||
// `DIVLEN/2 to get required number of bits
|
||||
// +1 for possible .5 and round bit
|
||||
// Count down Counter
|
||||
always @(posedge clk)
|
||||
begin
|
||||
DivDone = 0;
|
||||
if (count == `DIVLEN/2+1) DivDone <= #1 1;
|
||||
else if (DivDone | DivStart) DivDone <= #1 0;
|
||||
if (DivStart) count <= #1 0;
|
||||
else count <= #1 count+1;
|
||||
if (DivStart) Count <= #1 `DIVLEN/2+2;
|
||||
else Count <= #1 Count-1;
|
||||
end
|
||||
endmodule
|
||||
|
||||
@ -231,31 +244,34 @@ module srtpreproc (
|
||||
input logic Signed, // Interpret integers as signed 2's complement
|
||||
input logic Int, // Choose integer inputs
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic [`DIVLEN:0] X,
|
||||
output logic [`DIVLEN-1:0] X,
|
||||
output logic [`DIVLEN-1:0] Dpreproc,
|
||||
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
|
||||
output logic intSign // Quotient integer sign
|
||||
);
|
||||
|
||||
// logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
|
||||
// logic [`XLEN-1:0] PosA, PosB;
|
||||
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
|
||||
logic [`DIVLEN:0] PreprocA, PreprocX;
|
||||
logic [`DIVLEN-1:0] PreprocA, PreprocX;
|
||||
logic [`DIVLEN-1:0] PreprocB, PreprocY;
|
||||
|
||||
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
|
||||
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
|
||||
|
||||
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
|
||||
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
|
||||
|
||||
// ***can probably merge X LZC with conversion
|
||||
// cout the number of leading zeros
|
||||
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
|
||||
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
|
||||
|
||||
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
|
||||
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
|
||||
|
||||
// assign PreprocA = ExtraA << zeroCntA;
|
||||
// assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
|
||||
|
||||
|
||||
assign X = Int ? PreprocA : PreprocX;
|
||||
@ -291,7 +307,7 @@ module otfc4 (
|
||||
// if starting a new divison set Q to 0 and QM to -1
|
||||
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
|
||||
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
|
||||
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
|
||||
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
|
||||
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
|
||||
|
||||
// shift Q (quotent) and QM (quotent-1)
|
||||
@ -322,8 +338,7 @@ module otfc4 (
|
||||
QMNext = {QMR, 2'b11};
|
||||
end
|
||||
end
|
||||
// Quot is in the range [.5, 2) so normalize the result if nesissary
|
||||
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
|
||||
// Final Quoteint is in the range [.5, 2)
|
||||
|
||||
endmodule
|
||||
|
||||
@ -358,9 +373,11 @@ endmodule
|
||||
module expcalc(
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic XZeroE,
|
||||
output logic [`NE:0] DivCalcExp
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [`NE+1:0] DivCalcExp
|
||||
);
|
||||
|
||||
assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
|
||||
// correct exponent for denormalized input's normalization shifts
|
||||
assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
|
||||
|
||||
endmodule
|
||||
|
@ -2,7 +2,7 @@
|
||||
// srt.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 13 January 2022
|
||||
// Modified:
|
||||
// Modified: cturek@hmc.edu June 2022
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
|
||||
//
|
||||
@ -29,10 +29,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
`define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF)
|
||||
`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0)
|
||||
`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN))
|
||||
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
|
||||
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
|
||||
|
||||
module srt (
|
||||
input logic clk,
|
||||
@ -131,11 +129,11 @@ module srtpreproc (
|
||||
lzc #(`XLEN) lzcA (PosA, zeroCntA);
|
||||
lzc #(`XLEN) lzcB (PosB, zeroCntB);
|
||||
|
||||
assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}};
|
||||
assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}};
|
||||
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
|
||||
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
|
||||
|
||||
assign PreprocA = ExtraA << zeroCntA;
|
||||
assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
assign PreprocB = ExtraB << zeroCntB;
|
||||
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
|
||||
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
|
||||
|
||||
@ -228,14 +226,15 @@ module otfc2 #(parameter N=65) (
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
logic [N+2:0] Q, QM, QNext, QMNext;
|
||||
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [N+1:0] QR, QMR;
|
||||
|
||||
flopr #(N+3) Qreg(clk, Start, QNext, Q);
|
||||
flopr #(N+3) QMreg(clk, Start, QMNext, QM);
|
||||
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
|
||||
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
|
||||
|
||||
always_comb begin
|
||||
QR = Q[N+1:0];
|
||||
|
@ -1,4 +1,4 @@
|
||||
`define DIVLEN 65
|
||||
`define DIVLEN 64
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
@ -17,7 +17,7 @@ module counter(input logic clk,
|
||||
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if (count == `DIVLEN+1) done <= #1 1;
|
||||
if (count == `DIVLEN + 2) done <= #1 1;
|
||||
else if (done | req) done <= #1 0;
|
||||
if (req) count <= #1 0;
|
||||
else count <= #1 count+1;
|
||||
@ -101,8 +101,8 @@ module testbench;
|
||||
b = Vec[`memb];
|
||||
{bsign, bExp, bfrac} = b;
|
||||
nextr = Vec[`memr];
|
||||
r = Quot[`DIVLEN:`DIVLEN - 52];
|
||||
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
|
||||
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
req <= #5 1;
|
||||
end
|
||||
|
||||
@ -110,8 +110,8 @@ module testbench;
|
||||
|
||||
always @(posedge clk)
|
||||
begin
|
||||
r = Quot[`DIVLEN:`DIVLEN - 52];
|
||||
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
|
||||
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
|
||||
if (done)
|
||||
begin
|
||||
req <= #5 1;
|
||||
|
@ -55,6 +55,7 @@ module testbenchfp;
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic [`DIVLEN+2:0] Quot;
|
||||
logic CvtResDenormUfE;
|
||||
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
|
||||
logic DivStart, DivDone;
|
||||
|
||||
|
||||
@ -69,8 +70,9 @@ module testbenchfp;
|
||||
logic NegSumE;
|
||||
logic ZSgnEffE;
|
||||
logic PSgnE;
|
||||
logic DivSgn;
|
||||
logic [`NE:0] DivCalcExp;
|
||||
logic DivSticky;
|
||||
logic DivNegSticky;
|
||||
logic [`NE+1:0] DivCalcExp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -644,13 +646,13 @@ module testbenchfp;
|
||||
|
||||
postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .DivNegStickyM(DivNegSticky),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
|
||||
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
|
||||
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
|
||||
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
|
||||
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
@ -659,9 +661,9 @@ module testbenchfp;
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
|
||||
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
|
||||
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
|
||||
.DivDone, .Quot, .Rem());
|
||||
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
|
||||
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
|
||||
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
|
||||
|
||||
assign CmpFlg[3:0] = 0;
|
||||
|
||||
@ -814,8 +816,9 @@ end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// check if the non-fma test is correct
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
// check if result is correct
|
||||
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
errors += 1;
|
||||
$display("There is an error in %s", Tests[TestNum]);
|
||||
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
|
||||
@ -838,7 +841,7 @@ end
|
||||
$stop;
|
||||
end
|
||||
|
||||
if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
|
||||
if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
|
||||
|
||||
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
|
||||
|
||||
|
@ -68,6 +68,7 @@ logic [3:0] dummy;
|
||||
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
|
||||
|
||||
logic DCacheFlushDone, DCacheFlushStart;
|
||||
logic riscofTest;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
|
||||
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
|
||||
@ -174,6 +175,8 @@ logic [3:0] dummy;
|
||||
totalerrors = 0;
|
||||
testadr = 0;
|
||||
testadrNoBase = 0;
|
||||
// riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests
|
||||
riscofTest = tests[0] == "1"; // | tests[0] == "2";
|
||||
// fill memory with defined values to reduce Xs in simulation
|
||||
// Quick note the memory will need to be initialized. The C library does not
|
||||
// guarantee the initialized reads. For example a strcmp can read 6 byte
|
||||
@ -250,8 +253,7 @@ logic [3:0] dummy;
|
||||
for(i=0; i<SIGNATURESIZE; i=i+1) begin
|
||||
sig32[i] = 'bx;
|
||||
end
|
||||
// riscof tests have a different signature, tests[0] == "1" refers to RISCVARCHTESTs
|
||||
if (tests[0] == "1") signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
|
||||
if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
|
||||
else signame = {pathname, tests[test], ".signature.output"};
|
||||
// read signature, reformat in 64 bits if necessary
|
||||
$readmemh(signame, sig32);
|
||||
|
@ -33,8 +33,8 @@
|
||||
|
||||
string tvpaths[] = '{
|
||||
"../../addins/imperas-riscv-tests/work/",
|
||||
"../../tests/riscof/work/",
|
||||
"../../tests/wally-riscv-arch-test/work/",
|
||||
"../../tests/riscof/work/riscv-arch-test/",
|
||||
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/",
|
||||
"../../tests/imperas-riscv-tests/work/",
|
||||
"../../benchmarks/riscv-coremark/work/",
|
||||
"../../addins/embench-iot/"
|
||||
@ -1601,6 +1601,95 @@ string wally32i[] = '{
|
||||
|
||||
string wally32periph[] = '{
|
||||
`WALLYTEST,
|
||||
"rv32i_m/privilege/WALLY-gpio-01"
|
||||
"rv32i_m/privilege/WALLY-gpio-01",
|
||||
"rv32i_m/privilege/WALLY-clint-01"
|
||||
// "rv32i_m/privilege/WALLY-plic-01"
|
||||
// "rv32i_m/privilege/WALLY-uart-01"
|
||||
};
|
||||
|
||||
|
||||
// riscof test paths, to replace existing paths once riscof flow is working
|
||||
// string wally64a[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
|
||||
// };
|
||||
|
||||
// string wally32a[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
|
||||
|
||||
// };
|
||||
|
||||
// string wally64i[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
|
||||
// "rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
|
||||
// "rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
|
||||
// "rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
|
||||
// "rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
|
||||
// };
|
||||
|
||||
// string wally64priv[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
|
||||
// "rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
|
||||
// };
|
||||
|
||||
// string wally64periph[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
|
||||
// };
|
||||
|
||||
// string wally32i[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
|
||||
// "rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
|
||||
// "rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
|
||||
// "rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
|
||||
// "rv32i_m/I/src/WALLY-XOR.S/ref/Ref"
|
||||
// };
|
||||
|
||||
// string wally32priv[] = '{
|
||||
// `WALLYTEST,
|
||||
// "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
|
||||
// "rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
|
||||
// };
|
||||
|
@ -5,8 +5,8 @@ NAME := synth
|
||||
|
||||
# defaults
|
||||
export DESIGN ?= wallypipelinedcore
|
||||
export FREQ ?= 4000
|
||||
export CONFIG ?= rv64gc
|
||||
export FREQ ?= 3402
|
||||
export CONFIG ?= rv32e
|
||||
# sky130 and sky90 presently supported
|
||||
export TECH ?= tsmc28
|
||||
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
|
||||
@ -126,6 +126,8 @@ clean:
|
||||
rm -f command.log
|
||||
rm -f filenames*.log
|
||||
rm -f power.saif
|
||||
rm -f Synopsys_stack_trace_*.txt
|
||||
rm -f crte_*.txt
|
||||
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ import subprocess
|
||||
from matplotlib.cbook import flatten
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.lines as lines
|
||||
from wallySynth import testFreq
|
||||
|
||||
|
||||
def synthsintocsv():
|
||||
@ -26,7 +27,7 @@ def synthsintocsv():
|
||||
writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area'])
|
||||
|
||||
for oneSynth in allSynths:
|
||||
descrip = specReg.findall(oneSynth)
|
||||
descrip = specReg.findall(oneSynth) #[30:]
|
||||
width = descrip[2][:4]
|
||||
config = descrip[2][4:]
|
||||
if descrip[3][-2:] == 'nm':
|
||||
@ -46,7 +47,7 @@ def synthsintocsv():
|
||||
nums = [float(m) for m in nums]
|
||||
metrics += nums
|
||||
except:
|
||||
print(config + tech + freq + " doesn't have reports")
|
||||
print(width + config + tech + '_' + freq + " doesn't have reports")
|
||||
if metrics == []:
|
||||
pass
|
||||
else:
|
||||
@ -56,7 +57,7 @@ def synthsintocsv():
|
||||
file.close()
|
||||
|
||||
def synthsfromcsv(filename):
|
||||
Synth = namedtuple("Synth", " width config special tech freq delay area")
|
||||
Synth = namedtuple("Synth", "width config special tech freq delay area")
|
||||
with open(filename, newline='') as csvfile:
|
||||
csvreader = csv.reader(csvfile)
|
||||
global allSynths
|
||||
@ -110,23 +111,26 @@ def freqPlot(tech, width, config):
|
||||
plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png')
|
||||
# plt.show()
|
||||
|
||||
def areaDelay(width, tech, freq, config=None, special=None):
|
||||
def areaDelay(tech, freq, width=None, config=None, special=None):
|
||||
delays, areas, labels = ([] for i in range(3))
|
||||
|
||||
for oneSynth in allSynths:
|
||||
if (width == oneSynth.width) & (tech == oneSynth.tech) & (freq == oneSynth.freq):
|
||||
if (width==None) or (width == oneSynth.width):
|
||||
if (tech == oneSynth.tech) & (freq == oneSynth.freq):
|
||||
if (special != None) & (oneSynth.special == special):
|
||||
delays += [oneSynth.delay]
|
||||
areas += [oneSynth.area]
|
||||
labels += [oneSynth.config]
|
||||
labels += [oneSynth.width + oneSynth.config]
|
||||
elif (config != None) & (oneSynth.config == config):
|
||||
delays += [oneSynth.delay]
|
||||
areas += [oneSynth.area]
|
||||
labels += [oneSynth.special]
|
||||
else:
|
||||
delays += [oneSynth.delay]
|
||||
areas += [oneSynth.area]
|
||||
labels += [oneSynth.config + '_' + oneSynth.special]
|
||||
# else:
|
||||
# delays += [oneSynth.delay]
|
||||
# areas += [oneSynth.area]
|
||||
# labels += [oneSynth.config + '_' + oneSynth.special]
|
||||
if width == None:
|
||||
width = ''
|
||||
|
||||
f, (ax1) = plt.subplots(1, 1)
|
||||
plt.scatter(delays, areas)
|
||||
@ -154,8 +158,11 @@ def areaDelay(width, tech, freq, config=None, special=None):
|
||||
# ending freq in 42 means fpu was turned off manually
|
||||
|
||||
if __name__ == '__main__':
|
||||
synthsintocsv()
|
||||
# synthsintocsv()
|
||||
synthsfromcsv('Summary.csv')
|
||||
freqPlot('tsmc28', 'rv64', 'gc')
|
||||
areaDelay('rv32', 'tsmc28', 4200, config='gc')
|
||||
areaDelay('rv32', 'tsmc28', 3042, special='')
|
||||
freqPlot('tsmc28', 'rv32', 'e')
|
||||
freqPlot('sky90', 'rv32', 'e')
|
||||
areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc')
|
||||
areaDelay('tsmc28', testFreq[1], special='')
|
||||
areaDelay('sky90', testFreq[0], width='rv64', config='gc')
|
||||
areaDelay('sky90', testFreq[0], special='')
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
make clean
|
||||
mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p")
|
||||
mv newRuns runs
|
||||
mkdir newRuns
|
||||
|
@ -8,20 +8,22 @@ def runCommand(config, tech, freq):
|
||||
command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)
|
||||
subprocess.Popen(command, shell=True)
|
||||
|
||||
testFreq = [3000, 10000]
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
techs = ['sky90', 'tsmc28']
|
||||
bestAchieved = [750, 3000]
|
||||
sweepCenter = [870, 3000]
|
||||
synthsToRun = []
|
||||
|
||||
|
||||
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
|
||||
for i in [0, 1]:
|
||||
tech = techs[i]
|
||||
f = bestAchieved[i]
|
||||
for freq in [round(f+f*x/100) for x in arr]: # rv32e freq sweep
|
||||
sc = sweepCenter[i]
|
||||
f = testFreq[i]
|
||||
for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep
|
||||
synthsToRun += [['rv32e', tech, freq]]
|
||||
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic']: # configs
|
||||
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs
|
||||
synthsToRun += [[config, tech, f]]
|
||||
for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations
|
||||
config = 'rv64gc_' + mod
|
||||
|
@ -1,20 +1,40 @@
|
||||
arch_dir = ../../addins/riscv-arch-test
|
||||
wally_dir = ../wally-riscv-arch-test
|
||||
work_dir = ./riscof_work
|
||||
work = ./work
|
||||
arch_workdir = $(work)/riscv-arch-test
|
||||
wally_workdir = $(work)/wally-riscv-arch-test
|
||||
|
||||
current_dir = $(shell pwd)
|
||||
XLEN ?= 64
|
||||
|
||||
all: build
|
||||
all: root build_arch # build_wally memfile
|
||||
|
||||
build:
|
||||
root:
|
||||
mkdir -p $(work_dir)
|
||||
mkdir -p work
|
||||
mkdir -p $(work)
|
||||
mkdir -p $(arch_workdir)
|
||||
mkdir -p $(wally_workdir)
|
||||
sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
|
||||
|
||||
build_arch:
|
||||
riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
|
||||
rm -rf work/rv$(XLEN)i_m
|
||||
mv -f $(work_dir)/rv$(XLEN)i_m work/
|
||||
rm -rf $(arch_workdir)/rv$(XLEN)i_m
|
||||
mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/
|
||||
|
||||
build_wally:
|
||||
riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run
|
||||
rm -rf $(wally_workdir)/rv$(XLEN)i_m
|
||||
mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/
|
||||
|
||||
memfile:
|
||||
find $(work) -type f -name "*.elf" | grep "rv64i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done
|
||||
find $(work) -type f -name "*.elf" | grep "rv32i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
|
||||
find $(work) -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
|
||||
|
||||
clean:
|
||||
rm -f config64.ini
|
||||
rm -f config32.ini
|
||||
rm -rf $(work_dir)
|
||||
rm -rf work
|
||||
rm -rf $(wally_workdir)
|
||||
rm -rf $(arch_workdir)
|
@ -101,7 +101,7 @@ class sail_cSim(pluginTemplate):
|
||||
execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
|
||||
sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
|
||||
|
||||
execute += self.sail_exe[self.xlen] + ' --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
|
||||
execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
|
||||
|
||||
cov_str = ' '
|
||||
for label in testentry['coverage_labels']:
|
||||
|
@ -1,11 +1,11 @@
|
||||
hart_ids: [0]
|
||||
hart0:
|
||||
ISA: RV32IMFCZicsr_Zifencei
|
||||
ISA: RV32IMAFCZicsr_Zifencei
|
||||
physical_addr_sz: 32
|
||||
User_Spec_Version: '2.3'
|
||||
supported_xlen: [32]
|
||||
misa:
|
||||
reset-val: 0x40001124
|
||||
reset-val: 0x40001125
|
||||
rv32:
|
||||
accessible: true
|
||||
mxl:
|
||||
@ -23,7 +23,6 @@ hart0:
|
||||
warl:
|
||||
dependency_fields: []
|
||||
legal:
|
||||
- extensions[25:0] bitmask [0x0001124, 0x0000000]
|
||||
- extensions[25:0] bitmask [0x0001125, 0x0000000]
|
||||
wr_illegal:
|
||||
- Unchanged
|
||||
|
||||
|
@ -54,6 +54,7 @@ target_tests_nosim = \
|
||||
WALLY-status-sie-01 \
|
||||
WALLY-status-tw-01 \
|
||||
WALLY-gpio-01 \
|
||||
WALLY-clint-01 \
|
||||
|
||||
|
||||
rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))
|
||||
|
@ -0,0 +1,9 @@
|
||||
00000000 # msip zero on reset
|
||||
00000000 # mip is zero
|
||||
00000008 # mip msip bit is set
|
||||
00000000 # mip msip bit is reset
|
||||
00000000 # mip mtip bit is reset
|
||||
FFFFFFFF # mtimecmp is same as written value
|
||||
A5A5A5A5 # mtimecmph is same as written value
|
||||
00000000 # mip mtip is zero
|
||||
00000080 # mip mtip is set
|
@ -1,5 +1,18 @@
|
||||
00000000 # test reset to zero
|
||||
00000000
|
||||
00000000 # output_en
|
||||
00000000 # output_val
|
||||
00000000 # rise_ie
|
||||
00000000 # rise_ip
|
||||
00000000 # fall_ie
|
||||
00000000 # fall_ip
|
||||
00000000 # high_ie
|
||||
00000000 # high_ip
|
||||
00000000 # fall_ie
|
||||
ffffffff # fall_ip
|
||||
00000000 # iof_en
|
||||
00000000 # iof_sel
|
||||
00000000 # out_xor
|
||||
A5A5A5A5 # test output pins
|
||||
5A5AFFFF
|
||||
00000000 # test input enables
|
||||
|
@ -0,0 +1,103 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// WALLY-gpio
|
||||
//
|
||||
// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
|
||||
//
|
||||
// Created 2022-06-16
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
#include "WALLY-TEST-LIB-32.h"
|
||||
|
||||
INIT_TESTS
|
||||
|
||||
TRAP_HANDLER m
|
||||
|
||||
j run_test_loop // begin test loop/table tests instead of executing inline code.
|
||||
|
||||
INIT_TEST_TABLE
|
||||
|
||||
END_TESTS
|
||||
|
||||
TEST_STACK_AND_DATA
|
||||
|
||||
.align 2
|
||||
test_cases:
|
||||
# ---------------------------------------------------------------------------------------------
|
||||
# Test Contents
|
||||
#
|
||||
# Here is where the actual tests are held, or rather, what the actual tests do.
|
||||
# each entry consists of 3 values that will be read in as follows:
|
||||
#
|
||||
# '.4byte [x28 Value], [x29 Value], [x30 value]'
|
||||
# or
|
||||
# '.4byte [address], [value], [test type]'
|
||||
#
|
||||
# The encoding for x30 test type values can be found in the test handler in the framework file
|
||||
#
|
||||
# ---------------------------------------------------------------------------------------------
|
||||
|
||||
# =========== Define CLINT registers ===========
|
||||
|
||||
.equ CLINT, 0x02000000
|
||||
.equ msip, (CLINT+0x00)
|
||||
.equ mtimecmp, (CLINT+0x4000) # doesn't necessarily reset to zero
|
||||
.equ mtimecmph,(CLINT+0x4004)
|
||||
.equ mtime, (CLINT+0xBFF8) # resets to zero but cannot be easily tested
|
||||
.equ mtimeh, (CLINT+0xBFFC)
|
||||
|
||||
# =========== Verify verifiable registers reset to zero ===========
|
||||
|
||||
.4byte msip, 0x00000000, read32_test # msip reset to zero
|
||||
|
||||
# =========== msip tests ===========
|
||||
|
||||
.4byte msip, 0xFFFFFFFE, write32_test # write to invalid bits of msip
|
||||
.4byte 0x0, 0x00000000, readmip_test # msip bit should be zero
|
||||
.4byte msip, 0x00000001, write32_test # set msip to one
|
||||
.4byte 0x0, 0x00000008, readmip_test # msip bit is set
|
||||
.4byte msip, 0x00000000, write32_test # set msip to zero
|
||||
.4byte 0x0, 0x00000000, readmip_test # msip bit is released
|
||||
|
||||
# =========== mtime write tests ===========
|
||||
|
||||
.4byte mtime, 0x00000000, write32_test # test we can write to mtime
|
||||
.4byte mtimeh, 0x00000000, write32_test # test we can write to mtimeh
|
||||
.4byte 0x0,0x00000000, readmip_test # mtip bit should be zero
|
||||
|
||||
# =========== mtimecmp tests ===========
|
||||
|
||||
.4byte mtimecmp, 0xFFFFFFFF, write32_test # verify mtimecmp is writable
|
||||
.4byte mtimecmph, 0xA5A5A5A5, write32_test # verify mtimecmph is writable
|
||||
.4byte mtimecmp, 0xFFFFFFFF, read32_test # read back value written to mtimecmp
|
||||
.4byte mtimecmph, 0xA5A5A5A5, read32_test # read back value written to mtimecmph
|
||||
.4byte mtime, 0xFFFFFFFF, write32_test # write to mtime
|
||||
.4byte 0x0, 0x00000000, readmip_test # mtip should still be zero
|
||||
.4byte mtimeh, 0xA5A5A5A6, write32_test # cause mtip to go high by making mtime > mtimecmp
|
||||
.4byte 0x0, 0x00000080, readmip_test # mtip should be set
|
||||
|
||||
.4byte 0x0, 0x0, terminate_test # terminate tests
|
||||
|
||||
# =========== Experimental mtime counting test ===========
|
||||
|
||||
# .4byte mtimecmph, 0xFFFFFFFF, write32_test # make sure mtip isn't set until ready
|
||||
# .4byte mtimeh, 0x0FFFFFFF, write32_test # write near max value to mtimeh
|
||||
# .4byte mtime, 0x00000000, write32_test # write small value to mtime
|
||||
# .4byte 0x0, 0x000000000, readmip_test # mtip should be zero
|
||||
# .4byte mtimecmp, 0x00000001, write32_test # write slightly larger value than mtime to test mtime counting
|
||||
# .4byte mtimecmph, 0x0FFFFFFF, write32_test # write same value as mtimeh to test mtime counting
|
||||
# .4byte 0x0, 0x00000080, readmip_test # mtip should be set since it has been at least two cycles
|
@ -72,7 +72,19 @@ test_cases:
|
||||
|
||||
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
|
||||
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
|
||||
# *** add more
|
||||
.4byte output_en, 0x00000000, read32_test # output_en reset to zero
|
||||
.4byte output_val, 0x00000000, read32_test # output_val reset to zero
|
||||
.4byte rise_ie, 0x00000000, read32_test # rise_ie reset to zero
|
||||
.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero
|
||||
.4byte fall_ie, 0x00000000, read32_test # fall_ie reset to zero
|
||||
.4byte fall_ip, 0xffffffff, read32_test # fall_ip reset to ones (input_val is zero)
|
||||
.4byte high_ie, 0x00000000, read32_test # high_ie reset to zero
|
||||
.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero
|
||||
.4byte low_ie, 0x00000000, read32_test # low_ie reset to zero
|
||||
.4byte low_ip, 0x00000000, read32_test # low_ip reset to zero
|
||||
.4byte iof_en, 0x00000000, read32_test # iof_en reset to zero
|
||||
.4byte iof_sel, 0x00000000, read32_test # iof_sel reset to zero
|
||||
.4byte out_xor, 0x00000000, read32_test # out_xor reset to zero
|
||||
|
||||
# =========== Test output and input pins ===========
|
||||
|
||||
|
@ -857,6 +857,27 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
|
||||
addi a6, a6, 8
|
||||
.endm
|
||||
|
||||
.macro SETUP_PLIC
|
||||
# Setup PLIC with a series of register writes
|
||||
|
||||
.equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3
|
||||
.equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10
|
||||
.equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register
|
||||
.equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1
|
||||
.equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1
|
||||
.equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode)
|
||||
.equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0
|
||||
.equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode)
|
||||
.equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1
|
||||
|
||||
.4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
|
||||
.4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
|
||||
.4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
|
||||
.4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
|
||||
.4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
|
||||
.4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
|
||||
.endm
|
||||
|
||||
.macro END_TESTS
|
||||
// invokes one final ecall to return to machine mode then terminates this program, so the output is
|
||||
// 0x8: termination called from U mode
|
||||
@ -984,6 +1005,20 @@ read08_test:
|
||||
addi a6, a6, 8
|
||||
j test_loop // go to next test case
|
||||
|
||||
readmip_test: // read the MIP into the signature
|
||||
csrr t2, mip
|
||||
sw t2, 0(t1)
|
||||
addi t1, t1, 4
|
||||
addi a6, a6, 4
|
||||
j test_loop // go to next test case
|
||||
|
||||
readsip_test: // read the MIP into the signature
|
||||
csrr t2, sip
|
||||
sw t2, 0(t1)
|
||||
addi t1, t1, 4
|
||||
addi a6, a6, 4
|
||||
j test_loop // go to next test case
|
||||
|
||||
goto_s_mode:
|
||||
// return to address in t3,
|
||||
li a0, 3 // Trap handler behavior (go to supervisor mode)
|
||||
|
Loading…
Reference in New Issue
Block a user