This commit is contained in:
David Harris 2022-07-02 19:37:14 +00:00
commit bde1c5eb1b
41 changed files with 801 additions and 336 deletions

View File

@ -94,12 +94,12 @@
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
// Disable spurious Verilator warnings

View File

@ -44,16 +44,18 @@ configs = [
grepstr="All lints run with no errors or warnings"
)
]
def getBuildrootTC(short):
def getBuildrootTC(boot):
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
if short:
if boot:
name="buildrootboot"
BRcmd="vsim > {} -c <<!\ndo wally-pipelined.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
BRgrepstr="WallyHostname login:"
else:
name="buildroot"
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
BRgrepstr=str(INSTR_LIMIT)+" instructions"
else:
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV 0 1 0\n!"
BRgrepstr=str(MAX_EXPECTED)+" instructions"
return TestCase(name="buildroot",variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
return TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
tc = TestCase(
name="buildroot-checkpoint",
@ -136,14 +138,14 @@ def main():
os.system('./make-tests.sh | tee ./logs/make-tests.log')
if '-all' in sys.argv:
TIMEOUT_DUR = 30*3600 # seconds
configs.append(getBuildrootTC(short=False))
TIMEOUT_DUR = 30*7200 # seconds
configs.append(getBuildrootTC(boot=True))
elif '-buildroot' in sys.argv:
TIMEOUT_DUR = 30*3600 # seconds
configs=[getBuildrootTC(short=False)]
TIMEOUT_DUR = 30*7200 # seconds
configs=[getBuildrootTC(boot=True)]
else:
TIMEOUT_DUR = 10*60 # seconds
configs.append(getBuildrootTC(short=True))
configs.append(getBuildrootTC(boot=False))
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system

View File

@ -7,4 +7,4 @@
# sqrt - test square root
# all - test everything
vsim -c -do "do testfloat.do rv64fpquad all"
vsim -c -do "do testfloat.do rv64fpquad $1"

View File

@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -43,6 +43,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
output logic CacheCommitted,
output logic CacheStall,
// to performance counters to cpu
@ -120,7 +123,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM));
@ -159,8 +162,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
if (`LLEN>`XLEN)
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
else
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
.d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}),

View File

@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic [$clog2(NUMLINES)-1:0] RAdr,
input logic [`PA_BITS-1:0] PAdr,
input logic [LINELEN-1:0] CacheWriteData,
input logic FLoad2,
input logic SetValidWay,
input logic ClearValidWay,
input logic SetDirtyWay,
@ -74,8 +75,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux
/////////////////////////////////////////////////////////////////////////////////////////////
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
if(`LLEN>`XLEN)begin
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
end else
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
// If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR

View File

@ -2,14 +2,89 @@
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`NE:0] DivCalcExpM,
input logic [`NE+1:0] DivCalcExpM,
input logic [`FMTBITS-1:0] FmtM,
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NE:0] CorrDivExp
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm,
output logic [`NE+1:0] DivDenormShift
);
assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
// the quotent is in the range [.5,2)
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
logic [`NE+1:0] NormShift;
logic [`NE+1:0] Nf, NfPlus1;
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
// select the proper fraction lengnth
if (`FPSIZES == 1) begin
assign Nf = (`NE+2)'(`NF);
assign NfPlus1 = (`NE+2)'(`NF+1);
end else if (`FPSIZES == 2) begin
assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtM)
`FMT: begin
Nf = (`NE+2)'(`NF);
NfPlus1 = (`NE+2)'(`NF+1);
end
`FMT1: begin
Nf = (`NE+2)'(`NF1);
NfPlus1 = (`NE+2)'(`NF1+1);
end
`FMT2: begin
Nf = (`NE+2)'(`NF2);
NfPlus1 = (`NE+2)'(`NF2+1);
end
default: begin
Nf = 1'bx;
NfPlus1 = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtM)
2'h3: begin
Nf = (`NE+2)'(`Q_NF);
NfPlus1 = (`NE+2)'(`Q_NF+1);
end
2'h1: begin
Nf = (`NE+2)'(`D_NF);
NfPlus1 = (`NE+2)'(`D_NF+1);
end
2'h0: begin
Nf = (`NE+2)'(`S_NF);
NfPlus1 = (`NE+2)'(`S_NF+1);
end
2'h2: begin
Nf = (`NE+2)'(`H_NF);
NfPlus1 = (`NE+2)'(`H_NF+1);
end
endcase
end
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DivDenormShift = Nf+DivCalcExpM;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// inital Left shift amount = NF
assign NormShift = Nf;
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
// *** may be able to reduce shifter size
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
endmodule

View File

@ -33,8 +33,8 @@ module fctrl (
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
@ -121,7 +121,7 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)

View File

@ -4,7 +4,7 @@ module flags(
input logic XSgnM,
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic Plus1,
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, // inputs are NaN
@ -25,7 +25,7 @@ module flags(
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
output logic DivByZero,
output logic IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
output logic [4:0] PostProcFlgM // flags
);
logic SigNaN; // is an input a signaling NaN
@ -34,6 +34,7 @@ module flags(
logic IntInexact; // integer inexact flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic Underflow; // Underflow flag
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
logic ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
@ -88,7 +89,7 @@ module flags(
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
// detecting tininess after rounding
// the exponent is negitive
@ -98,11 +99,11 @@ module flags(
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn|DivByZero);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn|DivByZero);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
@ -133,8 +134,9 @@ module flags(
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
assign DivByZero = YZeroM&DivOp;
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn);
// Combine flags
// - to integer results do not set the underflow or overflow flags

View File

@ -120,8 +120,9 @@ module fmashiftcalc(
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
assign DenormShift = PreResultDenorm&~KillProdM ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
// - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, SumM};
assign FmaShiftAmt = FmaNormCntM+DenormShift;
assign FmaShiftAmt = (FmaNormCntM&{$clog2(3*`NF+7){~KillProdM}})+DenormShift;
endmodule

View File

@ -41,10 +41,12 @@ module fpu (
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadM, // Fp load instruction?
output logic FpLoadStoreM, // Fp load instruction?
output logic FLoad2,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
@ -124,7 +126,10 @@ module fpu (
//divide signals
logic [`DIVLEN+2:0] Quot;
logic [`NE:0] DivCalcExpM;
logic [`NE+1:0] DivCalcExpM;
logic DivNegStickyM;
logic DivStickyM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
@ -289,8 +294,19 @@ module fpu (
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0];
else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE};
if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0];
end else begin
logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
else assign FLoad2 = FmtM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
end
// NaN Block SrcA
generate
@ -308,7 +324,7 @@ module fpu (
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
@ -353,13 +369,13 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
assign FpLoadM = FResSelM[1];
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
// FPU flag selection - to privileged

View File

@ -1,29 +1,41 @@
`include "wally-config.vh"
module lzacorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] SumExp // exponent of the normalized sum
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic DivOp,
input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExpM,
input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] CorrDivExp,
output logic [`NE+1:0] SumExp // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2};
endmodule

View File

@ -30,95 +30,109 @@
`include "wally-config.vh"
module postprocess(
// general signals
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic ZDenormM, // is the original precision denormalized
input logic [1:0] PostProcSelM, // select result to be written to fp register
//fma signals
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE:0] DivCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
input logic [`DIVLEN+2:0] Quot,
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
//divide signals
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
input logic [`NE+1:0] DivCalcExpM, // the calculated expoent
input logic DivStickyM,
input logic DivNegStickyM,
input logic [`DIVLEN+2:0] Quot,
// conversion signals
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
// final results
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
);
logic [`NF-1:0] ResFrac; // Result fraction
logic [`NE-1:0] ResExp; // Result exponent
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
logic SumZero; // is the sum zero
logic Sticky; // Sticky bit
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
logic Mult; // multiply opperation
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Underflow, Invalid; // flags
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
// general signals
logic [`NF-1:0] ResFrac; // Result fraction
logic [`NE-1:0] ResExp; // Result exponent
logic [`CORRSHIFTSZ-1:0] CorrShifted; // corectly shifted fraction
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
logic Sticky; // Sticky bit
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Invalid; // flags
logic [`NE+1:0] RoundExp;
logic [`NE:0] CorrDivExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
logic CvtResUf;
logic DivOp;
logic InfIn;
logic ResSgn;
logic RoundSgn;
logic NaNIn;
logic DivByZero;
logic UfLSBRes;
logic Sqrt;
logic [`FMTBITS-1:0] OutFmt;
// fma signals
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic SumZero; // is the sum zero
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
// division singals
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
logic [`NE+1:0] CorrDivExp;
logic DivByZero;
logic DivResDenorm;
logic [`NE+1:0] DivDenormShift;
// conversion signals
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
logic [1:0] NegResMSBS;
logic CvtResUf;
// readability signals
logic Mult; // multiply opperation
logic Int64; // is the integer 64 bits?
logic Signed; // is the opperation with a signed integer?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic CvtOp;
logic FmaOp;
logic DivOp;
logic InfIn;
logic NaNIn;
logic Sqrt;
// signals to help readability
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign ToInt = FWriteIntM;
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign ToInt = FWriteIntM;
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
assign CvtOp = (PostProcSelM == 2'b00);
assign FmaOp = (PostProcSelM == 2'b10);
assign DivOp = (PostProcSelM == 2'b01);
assign Sqrt = FOpCtrlM[0];
assign Sqrt = FOpCtrlM[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
@ -140,7 +154,7 @@ module postprocess(
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSelM)
@ -154,7 +168,7 @@ module postprocess(
end
2'b01: begin //div ***prob can take out
ShiftAmt = DivShiftAmt;
ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
ShiftIn = DivShiftIn;
end
default: begin
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
@ -165,7 +179,8 @@ module postprocess(
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
.SumZero, .Shifted, .SumExp, .CorrShifted);
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM,
.CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -179,6 +194,7 @@ module postprocess(
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.DivStickyM, .DivNegStickyM,
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
@ -197,7 +213,7 @@ module postprocess(
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero,
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
@ -206,6 +222,7 @@ module postprocess(
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
.XInfM, .YInfM, .DivOp,
.DivByZero, .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
endmodule

View File

@ -4,26 +4,27 @@ module resultselect(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic XZeroM,
input logic XInfM, YInfM,
input logic XZeroM, ZZeroM,
input logic IntZeroM,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
input logic CvtOp,
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
input logic DivOp,
input logic FmaOp,
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
input logic Plus1,
input logic DivByZero,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ResSgn, // the res's sign
input logic [`FLEN:0] RoundAdd, // how much to add to the res
input logic IntInvalid, Invalid, Overflow, // flags
@ -35,16 +36,17 @@ module resultselect(
output logic [1:0] NegResMSBS,
output logic [`XLEN-1:0] FCvtIntResM // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
logic OfResMax;
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic [`XLEN+1:0] NegRes; // the negation of the result
logic KillRes;
logic SelOfRes;
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
if (`FPSIZES == 1) begin
@ -59,8 +61,7 @@ module resultselect(
end
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)};
assign NormRes = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
@ -75,8 +76,7 @@ module resultselect(
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
@ -93,8 +93,7 @@ module resultselect(
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
@ -107,8 +106,7 @@ module resultselect(
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
@ -122,8 +120,7 @@ module resultselect(
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
@ -136,7 +133,6 @@ module resultselect(
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
KillProdRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
@ -156,8 +152,7 @@ module resultselect(
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
@ -170,8 +165,7 @@ module resultselect(
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
@ -185,8 +179,7 @@ module resultselect(
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
@ -200,9 +193,8 @@ module resultselect(
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
// zero is exact fi dividing by infinity so don't add 1
UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
end
endcase
@ -217,22 +209,20 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754) begin
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
YNaNM&~CvtOp ? YNaNRes :
ZNaNM&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
Overflow|DivByZero|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
SelOfRes ? OfRes :
KillRes ? UfRes :
NormRes;
end else begin
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
Overflow|DivByZero|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
SelOfRes ? OfRes :
KillRes ? UfRes :
NormRes;
end

View File

@ -8,40 +8,42 @@
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic DivOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
input logic CvtOp,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic RoundSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE:0] CorrDivExp, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
output logic [`NE-1:0] ResExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`NE+1:0] RoundExp,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, UfLSBRes // bits needed to calculate rounding
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic DivOp,
input logic CvtOp,
input logic ToInt,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic RoundSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE+1:0] CorrDivExp, // the calculated expoent
input logic DivStickyM, // sticky bit
input logic DivNegStickyM,
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
output logic [`NE-1:0] ResExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`NE+1:0] RoundExp,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, UfLSBRes // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic FpRes, IntRes;
logic UfRound;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
@ -149,7 +151,7 @@ module round(
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp | DivStickyM&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
@ -223,9 +225,11 @@ module round(
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
// Deterimine if a small number was supposed to be subtrated
// - for FMA or if division has a negitive sticky bit
assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
always_comb begin
// Determine if you add 1
@ -305,7 +309,7 @@ module round(
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
2'b01: RoundExp = CorrDivExp; // divide
default: RoundExp = 0;
endcase

View File

@ -124,12 +124,18 @@ module datapath (
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux
if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
assign WriteDataE = ForwardedSrcBE;
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else if (`F_SUPPORTED) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);

View File

@ -227,7 +227,7 @@ module ifu (
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine),
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),

View File

@ -57,7 +57,9 @@ module lsu (
input logic BigEndianM,
input logic sfencevmaM,
// fpu
input logic FpLoadM,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
@ -235,7 +237,7 @@ module lsu (
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount,
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
@ -269,7 +271,7 @@ module lsu (
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
/////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register

View File

@ -35,7 +35,7 @@ module subwordread
input logic [`LLEN-1:0] ReadDataWordMuxM,
input logic [2:0] LSUPAdrM,
input logic [2:0] Funct3M,
input logic FpLoadM,
input logic FpLoadStoreM,
output logic [`LLEN-1:0] ReadDataM
);
@ -83,16 +83,16 @@ module subwordread
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: if(`D_SUPPORTED)
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld
3'b100: if(`Q_SUPPORTED)
ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
else
ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
@ -122,10 +122,10 @@ module subwordread
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // fld
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu

View File

@ -92,13 +92,15 @@ module wallypipelinedcore (
logic FStallD;
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic FLoad2;
logic [`FLEN-1:0] FWriteDataM;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic FpLoadM;
logic FpLoadStoreM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
@ -253,7 +255,8 @@ module wallypipelinedcore (
.AtomicM, .TrapM,
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadM,
.FpLoadStoreM,
.FWriteDataM, .FLoad2,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataW, .FlushDCacheM,
@ -391,10 +394,12 @@ module wallypipelinedcore (
.RdM, .RdW, // which FP register to write to (from IEU)
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadM,
.FpLoadStoreM,
.FLoad2,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FWriteDataM, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FCvtIntResW, // fp -> int conversion result to be stored in int register
.FResSelW, // fpu result selection

View File

@ -23,5 +23,10 @@ qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
inttestgen: inttestgen.c
gcc -lm -o inttestgen inttestgen.c
./inttestgen
clean:
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2

View File

@ -46,7 +46,7 @@ void main(void)
int i, j;
int bias = 1023;
if ((fptr = fopen("testvectors","w")) == NULL) {
if ((fptr = fopen("testvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}

View File

@ -36,41 +36,45 @@ module srtradix4 (
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF:0] XManE, YManE,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic XZeroE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivDone,
output logic DivStickyE,
output logic DivNegStickyE,
output logic [`DIVLEN+2:0] Quot,
output logic [`XLEN-1:0] Rem, // *** later handle integers
output logic [`NE:0] DivCalcExpE
output logic [`NE+1:0] DivCalcExpE
);
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE:0] DivCalcExp;
logic [`DIVLEN:0] X;
logic [`NE+1:0] DivCalcExp;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic intSign;
srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X,
.XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - assumed one is added here since all numbers are normlaized
// *** wait what about zero? is that specal case? can the divider handle it?
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
@ -88,7 +92,7 @@ module srtradix4 (
qsel4 qsel4(.D, .WS, .WC, .q);
// Store the expoenent and sign until division is DivDone
flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
@ -114,9 +118,10 @@ module srtradix4 (
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
divcounter divcounter(clk, DivStart, DivDone);
earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
.YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
endmodule
@ -124,28 +129,36 @@ endmodule
// Submodules //
////////////////
/////////////
// counter //
/////////////
module divcounter(input logic clk,
input logic DivStart,
output logic DivDone);
module earlytermination(
input logic clk,
input logic [`DIVLEN+3:0] WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivStickyE,
output logic DivNegStickyE,
output logic DivDone);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
logic [$clog2(`DIVLEN/2+3)-1:0] Count;
logic WZero;
logic [`DIVLEN+3:0] W;
assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
assign DivDone = (DivStickyE | WZero);
assign DivStickyE = ~|Count;
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = Count;
// +1 for setup
// `DIVLEN/2 to get required number of bits
// +1 for possible .5 and round bit
// Count down Counter
always @(posedge clk)
begin
DivDone = 0;
if (count == `DIVLEN/2+1) DivDone <= #1 1;
else if (DivDone | DivStart) DivDone <= #1 0;
if (DivStart) count <= #1 0;
else count <= #1 count+1;
if (DivStart) Count <= #1 `DIVLEN/2+2;
else Count <= #1 Count-1;
end
endmodule
@ -231,31 +244,34 @@ module srtpreproc (
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN:0] X,
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
// logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
@ -291,7 +307,7 @@ module otfc4 (
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
@ -322,8 +338,7 @@ module otfc4 (
QMNext = {QMR, 2'b11};
end
end
// Quot is in the range [.5, 2) so normalize the result if nesissary
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
// Final Quoteint is in the range [.5, 2)
endmodule
@ -358,9 +373,11 @@ endmodule
module expcalc(
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
output logic [`NE:0] DivCalcExp
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
endmodule

View File

@ -2,7 +2,7 @@
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified:
// Modified: cturek@hmc.edu June 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
@ -29,10 +29,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF)
`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN))
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk,
@ -131,11 +129,11 @@ module srtpreproc (
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}};
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocB = ExtraB << zeroCntB;
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
@ -228,14 +226,15 @@ module otfc2 #(parameter N=65) (
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext;
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
flopr #(N+3) QMreg(clk, Start, QMNext, QM);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin
QR = Q[N+1:0];

View File

@ -1,4 +1,4 @@
`define DIVLEN 65
`define DIVLEN 64
/////////////
// counter //
@ -17,7 +17,7 @@ module counter(input logic clk,
always @(posedge clk)
begin
if (count == `DIVLEN+1) done <= #1 1;
if (count == `DIVLEN + 2) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
@ -101,8 +101,8 @@ module testbench;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
req <= #5 1;
end
@ -110,8 +110,8 @@ module testbench;
always @(posedge clk)
begin
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
if (done)
begin
req <= #5 1;

View File

@ -55,6 +55,7 @@ module testbenchfp;
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic CvtResDenormUfE;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
logic DivStart, DivDone;
@ -69,8 +70,9 @@ module testbenchfp;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
logic DivSgn;
logic [`NE:0] DivCalcExp;
logic DivSticky;
logic DivNegSticky;
logic [`NE+1:0] DivCalcExp;
///////////////////////////////////////////////////////////////////////////////////////////////
@ -644,13 +646,13 @@ module testbenchfp;
postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .DivNegStickyM(DivNegSticky),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
@ -659,9 +661,9 @@ module testbenchfp;
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
.DivDone, .Quot, .Rem());
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
assign CmpFlg[3:0] = 0;
@ -814,8 +816,9 @@ end
///////////////////////////////////////////////////////////////////////////////////////////////
// check if the non-fma test is correct
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
// check if result is correct
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -838,7 +841,7 @@ end
$stop;
end
if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file

View File

@ -68,6 +68,7 @@ logic [3:0] dummy;
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
logic DCacheFlushDone, DCacheFlushStart;
logic riscofTest;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
@ -174,6 +175,8 @@ logic [3:0] dummy;
totalerrors = 0;
testadr = 0;
testadrNoBase = 0;
// riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests
riscofTest = tests[0] == "1"; // | tests[0] == "2";
// fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte
@ -250,8 +253,7 @@ logic [3:0] dummy;
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
// riscof tests have a different signature, tests[0] == "1" refers to RISCVARCHTESTs
if (tests[0] == "1") signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
else signame = {pathname, tests[test], ".signature.output"};
// read signature, reformat in 64 bits if necessary
$readmemh(signame, sig32);

View File

@ -33,8 +33,8 @@
string tvpaths[] = '{
"../../addins/imperas-riscv-tests/work/",
"../../tests/riscof/work/",
"../../tests/wally-riscv-arch-test/work/",
"../../tests/riscof/work/riscv-arch-test/",
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/",
"../../tests/imperas-riscv-tests/work/",
"../../benchmarks/riscv-coremark/work/",
"../../addins/embench-iot/"
@ -1601,6 +1601,95 @@ string wally32i[] = '{
string wally32periph[] = '{
`WALLYTEST,
"rv32i_m/privilege/WALLY-gpio-01"
"rv32i_m/privilege/WALLY-gpio-01",
"rv32i_m/privilege/WALLY-clint-01"
// "rv32i_m/privilege/WALLY-plic-01"
// "rv32i_m/privilege/WALLY-uart-01"
};
// riscof test paths, to replace existing paths once riscof flow is working
// string wally64a[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
// };
// string wally32a[] = '{
// `WALLYTEST,
// "rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
// };
// string wally64i[] = '{
// `WALLYTEST,
// "rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
// "rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
// };
// string wally64priv[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
// };
// string wally64periph[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
// };
// string wally32i[] = '{
// `WALLYTEST,
// "rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
// "rv32i_m/I/src/WALLY-XOR.S/ref/Ref"
// };
// string wally32priv[] = '{
// `WALLYTEST,
// "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
// };

View File

@ -5,8 +5,8 @@ NAME := synth
# defaults
export DESIGN ?= wallypipelinedcore
export FREQ ?= 4000
export CONFIG ?= rv64gc
export FREQ ?= 3402
export CONFIG ?= rv32e
# sky130 and sky90 presently supported
export TECH ?= tsmc28
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
@ -126,6 +126,8 @@ clean:
rm -f command.log
rm -f filenames*.log
rm -f power.saif
rm -f Synopsys_stack_trace_*.txt
rm -f crte_*.txt

View File

@ -7,6 +7,7 @@ import subprocess
from matplotlib.cbook import flatten
import matplotlib.pyplot as plt
import matplotlib.lines as lines
from wallySynth import testFreq
def synthsintocsv():
@ -26,7 +27,7 @@ def synthsintocsv():
writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area'])
for oneSynth in allSynths:
descrip = specReg.findall(oneSynth)
descrip = specReg.findall(oneSynth) #[30:]
width = descrip[2][:4]
config = descrip[2][4:]
if descrip[3][-2:] == 'nm':
@ -46,7 +47,7 @@ def synthsintocsv():
nums = [float(m) for m in nums]
metrics += nums
except:
print(config + tech + freq + " doesn't have reports")
print(width + config + tech + '_' + freq + " doesn't have reports")
if metrics == []:
pass
else:
@ -56,7 +57,7 @@ def synthsintocsv():
file.close()
def synthsfromcsv(filename):
Synth = namedtuple("Synth", " width config special tech freq delay area")
Synth = namedtuple("Synth", "width config special tech freq delay area")
with open(filename, newline='') as csvfile:
csvreader = csv.reader(csvfile)
global allSynths
@ -110,23 +111,26 @@ def freqPlot(tech, width, config):
plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png')
# plt.show()
def areaDelay(width, tech, freq, config=None, special=None):
def areaDelay(tech, freq, width=None, config=None, special=None):
delays, areas, labels = ([] for i in range(3))
for oneSynth in allSynths:
if (width == oneSynth.width) & (tech == oneSynth.tech) & (freq == oneSynth.freq):
if (special != None) & (oneSynth.special == special):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.config]
elif (config != None) & (oneSynth.config == config):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.special]
else:
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.config + '_' + oneSynth.special]
if (width==None) or (width == oneSynth.width):
if (tech == oneSynth.tech) & (freq == oneSynth.freq):
if (special != None) & (oneSynth.special == special):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.width + oneSynth.config]
elif (config != None) & (oneSynth.config == config):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.special]
# else:
# delays += [oneSynth.delay]
# areas += [oneSynth.area]
# labels += [oneSynth.config + '_' + oneSynth.special]
if width == None:
width = ''
f, (ax1) = plt.subplots(1, 1)
plt.scatter(delays, areas)
@ -154,8 +158,11 @@ def areaDelay(width, tech, freq, config=None, special=None):
# ending freq in 42 means fpu was turned off manually
if __name__ == '__main__':
synthsintocsv()
# synthsintocsv()
synthsfromcsv('Summary.csv')
freqPlot('tsmc28', 'rv64', 'gc')
areaDelay('rv32', 'tsmc28', 4200, config='gc')
areaDelay('rv32', 'tsmc28', 3042, special='')
freqPlot('tsmc28', 'rv32', 'e')
freqPlot('sky90', 'rv32', 'e')
areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc')
areaDelay('tsmc28', testFreq[1], special='')
areaDelay('sky90', testFreq[0], width='rv64', config='gc')
areaDelay('sky90', testFreq[0], special='')

View File

@ -1,5 +1,6 @@
#!/usr/bin/bash
make clean
mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p")
mv newRuns runs
mkdir newRuns

View File

@ -8,20 +8,22 @@ def runCommand(config, tech, freq):
command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)
subprocess.Popen(command, shell=True)
testFreq = [3000, 10000]
if __name__ == '__main__':
techs = ['sky90', 'tsmc28']
bestAchieved = [750, 3000]
sweepCenter = [870, 3000]
synthsToRun = []
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
for i in [0, 1]:
tech = techs[i]
f = bestAchieved[i]
for freq in [round(f+f*x/100) for x in arr]: # rv32e freq sweep
sc = sweepCenter[i]
f = testFreq[i]
for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep
synthsToRun += [['rv32e', tech, freq]]
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic']: # configs
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs
synthsToRun += [[config, tech, f]]
for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations
config = 'rv64gc_' + mod

View File

@ -1,20 +1,40 @@
arch_dir = ../../addins/riscv-arch-test
wally_dir = ../wally-riscv-arch-test
work_dir = ./riscof_work
work = ./work
arch_workdir = $(work)/riscv-arch-test
wally_workdir = $(work)/wally-riscv-arch-test
current_dir = $(shell pwd)
XLEN ?= 64
all: build
all: root build_arch # build_wally memfile
build:
root:
mkdir -p $(work_dir)
mkdir -p work
mkdir -p $(work)
mkdir -p $(arch_workdir)
mkdir -p $(wally_workdir)
sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
build_arch:
riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rm -rf work/rv$(XLEN)i_m
mv -f $(work_dir)/rv$(XLEN)i_m work/
rm -rf $(arch_workdir)/rv$(XLEN)i_m
mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/
build_wally:
riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run
rm -rf $(wally_workdir)/rv$(XLEN)i_m
mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/
memfile:
find $(work) -type f -name "*.elf" | grep "rv64i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done
find $(work) -type f -name "*.elf" | grep "rv32i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(work) -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
clean:
rm -f config64.ini
rm -f config32.ini
rm -rf $(work_dir)
rm -rf work
rm -rf $(wally_workdir)
rm -rf $(arch_workdir)

View File

@ -101,7 +101,7 @@ class sail_cSim(pluginTemplate):
execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
execute += self.sail_exe[self.xlen] + ' --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
cov_str = ' '
for label in testentry['coverage_labels']:

View File

@ -1,11 +1,11 @@
hart_ids: [0]
hart0:
ISA: RV32IMFCZicsr_Zifencei
ISA: RV32IMAFCZicsr_Zifencei
physical_addr_sz: 32
User_Spec_Version: '2.3'
supported_xlen: [32]
misa:
reset-val: 0x40001124
reset-val: 0x40001125
rv32:
accessible: true
mxl:
@ -23,7 +23,6 @@ hart0:
warl:
dependency_fields: []
legal:
- extensions[25:0] bitmask [0x0001124, 0x0000000]
- extensions[25:0] bitmask [0x0001125, 0x0000000]
wr_illegal:
- Unchanged
- Unchanged

View File

@ -54,6 +54,7 @@ target_tests_nosim = \
WALLY-status-sie-01 \
WALLY-status-tw-01 \
WALLY-gpio-01 \
WALLY-clint-01 \
rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))

View File

@ -0,0 +1,9 @@
00000000 # msip zero on reset
00000000 # mip is zero
00000008 # mip msip bit is set
00000000 # mip msip bit is reset
00000000 # mip mtip bit is reset
FFFFFFFF # mtimecmp is same as written value
A5A5A5A5 # mtimecmph is same as written value
00000000 # mip mtip is zero
00000080 # mip mtip is set

View File

@ -1,5 +1,18 @@
00000000 # test reset to zero
00000000
00000000 # output_en
00000000 # output_val
00000000 # rise_ie
00000000 # rise_ip
00000000 # fall_ie
00000000 # fall_ip
00000000 # high_ie
00000000 # high_ip
00000000 # fall_ie
ffffffff # fall_ip
00000000 # iof_en
00000000 # iof_sel
00000000 # out_xor
A5A5A5A5 # test output pins
5A5AFFFF
00000000 # test input enables

View File

@ -0,0 +1,103 @@
///////////////////////////////////////////
//
// WALLY-gpio
//
// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
//
// Created 2022-06-16
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-32.h"
INIT_TESTS
TRAP_HANDLER m
j run_test_loop // begin test loop/table tests instead of executing inline code.
INIT_TEST_TABLE
END_TESTS
TEST_STACK_AND_DATA
.align 2
test_cases:
# ---------------------------------------------------------------------------------------------
# Test Contents
#
# Here is where the actual tests are held, or rather, what the actual tests do.
# each entry consists of 3 values that will be read in as follows:
#
# '.4byte [x28 Value], [x29 Value], [x30 value]'
# or
# '.4byte [address], [value], [test type]'
#
# The encoding for x30 test type values can be found in the test handler in the framework file
#
# ---------------------------------------------------------------------------------------------
# =========== Define CLINT registers ===========
.equ CLINT, 0x02000000
.equ msip, (CLINT+0x00)
.equ mtimecmp, (CLINT+0x4000) # doesn't necessarily reset to zero
.equ mtimecmph,(CLINT+0x4004)
.equ mtime, (CLINT+0xBFF8) # resets to zero but cannot be easily tested
.equ mtimeh, (CLINT+0xBFFC)
# =========== Verify verifiable registers reset to zero ===========
.4byte msip, 0x00000000, read32_test # msip reset to zero
# =========== msip tests ===========
.4byte msip, 0xFFFFFFFE, write32_test # write to invalid bits of msip
.4byte 0x0, 0x00000000, readmip_test # msip bit should be zero
.4byte msip, 0x00000001, write32_test # set msip to one
.4byte 0x0, 0x00000008, readmip_test # msip bit is set
.4byte msip, 0x00000000, write32_test # set msip to zero
.4byte 0x0, 0x00000000, readmip_test # msip bit is released
# =========== mtime write tests ===========
.4byte mtime, 0x00000000, write32_test # test we can write to mtime
.4byte mtimeh, 0x00000000, write32_test # test we can write to mtimeh
.4byte 0x0,0x00000000, readmip_test # mtip bit should be zero
# =========== mtimecmp tests ===========
.4byte mtimecmp, 0xFFFFFFFF, write32_test # verify mtimecmp is writable
.4byte mtimecmph, 0xA5A5A5A5, write32_test # verify mtimecmph is writable
.4byte mtimecmp, 0xFFFFFFFF, read32_test # read back value written to mtimecmp
.4byte mtimecmph, 0xA5A5A5A5, read32_test # read back value written to mtimecmph
.4byte mtime, 0xFFFFFFFF, write32_test # write to mtime
.4byte 0x0, 0x00000000, readmip_test # mtip should still be zero
.4byte mtimeh, 0xA5A5A5A6, write32_test # cause mtip to go high by making mtime > mtimecmp
.4byte 0x0, 0x00000080, readmip_test # mtip should be set
.4byte 0x0, 0x0, terminate_test # terminate tests
# =========== Experimental mtime counting test ===========
# .4byte mtimecmph, 0xFFFFFFFF, write32_test # make sure mtip isn't set until ready
# .4byte mtimeh, 0x0FFFFFFF, write32_test # write near max value to mtimeh
# .4byte mtime, 0x00000000, write32_test # write small value to mtime
# .4byte 0x0, 0x000000000, readmip_test # mtip should be zero
# .4byte mtimecmp, 0x00000001, write32_test # write slightly larger value than mtime to test mtime counting
# .4byte mtimecmph, 0x0FFFFFFF, write32_test # write same value as mtimeh to test mtime counting
# .4byte 0x0, 0x00000080, readmip_test # mtip should be set since it has been at least two cycles

View File

@ -70,9 +70,21 @@ test_cases:
# =========== Verify all registers reset to zero ===========
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
# *** add more
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
.4byte output_en, 0x00000000, read32_test # output_en reset to zero
.4byte output_val, 0x00000000, read32_test # output_val reset to zero
.4byte rise_ie, 0x00000000, read32_test # rise_ie reset to zero
.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero
.4byte fall_ie, 0x00000000, read32_test # fall_ie reset to zero
.4byte fall_ip, 0xffffffff, read32_test # fall_ip reset to ones (input_val is zero)
.4byte high_ie, 0x00000000, read32_test # high_ie reset to zero
.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero
.4byte low_ie, 0x00000000, read32_test # low_ie reset to zero
.4byte low_ip, 0x00000000, read32_test # low_ip reset to zero
.4byte iof_en, 0x00000000, read32_test # iof_en reset to zero
.4byte iof_sel, 0x00000000, read32_test # iof_sel reset to zero
.4byte out_xor, 0x00000000, read32_test # out_xor reset to zero
# =========== Test output and input pins ===========

View File

@ -857,6 +857,27 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
addi a6, a6, 8
.endm
.macro SETUP_PLIC
# Setup PLIC with a series of register writes
.equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3
.equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10
.equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register
.equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1
.equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1
.equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode)
.equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0
.equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode)
.equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1
.4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
.4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
.4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
.4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
.4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
.4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
.endm
.macro END_TESTS
// invokes one final ecall to return to machine mode then terminates this program, so the output is
// 0x8: termination called from U mode
@ -984,6 +1005,20 @@ read08_test:
addi a6, a6, 8
j test_loop // go to next test case
readmip_test: // read the MIP into the signature
csrr t2, mip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
readsip_test: // read the MIP into the signature
csrr t2, sip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
goto_s_mode:
// return to address in t3,
li a0, 3 // Trap handler behavior (go to supervisor mode)