reverted tests.vh to work on existing flow, added commented out paths to new riscof tests once that build has finished

This commit is contained in:
Daniel Torres 2022-06-29 12:32:30 -07:00
commit d1eebac73f
28 changed files with 485 additions and 158 deletions

View File

@ -94,9 +94,9 @@
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))

View File

@ -43,6 +43,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
output logic CacheCommitted,
output logic CacheStall,
// to performance counters to cpu
@ -120,7 +123,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM));
@ -159,8 +162,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
if (`LLEN>`XLEN)
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
else
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
.d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}),

View File

@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic [$clog2(NUMLINES)-1:0] RAdr,
input logic [`PA_BITS-1:0] PAdr,
input logic [LINELEN-1:0] CacheWriteData,
input logic FLoad2,
input logic SetValidWay,
input logic ClearValidWay,
input logic SetDirtyWay,
@ -74,8 +75,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux
/////////////////////////////////////////////////////////////////////////////////////////////
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
if(`LLEN>`XLEN)begin
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
end else
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
// If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR

View File

@ -7,16 +7,15 @@ module divshiftcalc(
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic [`NE+1:0] CorrDivExp
output logic DivResDenorm,
output logic [`NE+1:0] DivDenormShift
);
logic ResDenorm;
logic [`NE+1:0] DenormShift;
logic [`NE+1:0] NormShift;
logic [`NE+1:0] Nf, NfPlus1;
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
// select the proper fraction lengnth
if (`FPSIZES == 1) begin
assign Nf = (`NE+2)'(`NF);
@ -70,24 +69,22 @@ module divshiftcalc(
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .000xxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = 0
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DenormShift = Nf+DivCalcExpM;
assign DivDenormShift = Nf+DivCalcExpM;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000x.xxxxxx... << NF+1 Exp = DivCalcExp
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1
// Left shift amount = NF+1 plus 1 if normalization required
assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// inital Left shift amount = NF
assign NormShift = Nf;
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
// *** may be able to reduce shifter size
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
endmodule

View File

@ -33,8 +33,8 @@ module fctrl (
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
@ -121,7 +121,7 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)

View File

@ -41,10 +41,12 @@ module fpu (
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadM, // Fp load instruction?
output logic FpLoadStoreM, // Fp load instruction?
output logic FLoad2,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
@ -292,8 +294,19 @@ module fpu (
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0];
else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE};
if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0];
end else begin
logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
else assign FLoad2 = FmtM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
end
// NaN Block SrcA
generate
@ -311,7 +324,7 @@ module fpu (
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
@ -356,7 +369,7 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
assign FpLoadM = FResSelM[1];
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,

View File

@ -3,14 +3,20 @@
module lzacorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic DivOp,
input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExpM,
input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] CorrDivExp,
output logic [`NE+1:0] SumExp // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -19,11 +25,17 @@ module lzacorrection(
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2};
endmodule

View File

@ -112,6 +112,8 @@ module postprocess(
logic UfLSBRes;
logic Sqrt;
logic [`FMTBITS-1:0] OutFmt;
logic DivResDenorm;
logic [`NE+1:0] DivDenormShift;
// signals to help readability
assign Signed = FOpCtrlM[0];
@ -144,7 +146,7 @@ module postprocess(
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSelM)
@ -169,7 +171,8 @@ module postprocess(
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
.SumZero, .Shifted, .SumExp, .CorrShifted);
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM,
.CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted);
///////////////////////////////////////////////////////////////////////////////
// Rounding

View File

@ -124,12 +124,18 @@ module datapath (
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux
if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
assign WriteDataE = ForwardedSrcBE;
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else if (`F_SUPPORTED) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);

View File

@ -227,7 +227,7 @@ module ifu (
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine),
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),

View File

@ -57,7 +57,9 @@ module lsu (
input logic BigEndianM,
input logic sfencevmaM,
// fpu
input logic FpLoadM,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
@ -235,7 +237,7 @@ module lsu (
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount,
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
@ -269,7 +271,7 @@ module lsu (
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
/////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register

View File

@ -35,7 +35,7 @@ module subwordread
input logic [`LLEN-1:0] ReadDataWordMuxM,
input logic [2:0] LSUPAdrM,
input logic [2:0] Funct3M,
input logic FpLoadM,
input logic FpLoadStoreM,
output logic [`LLEN-1:0] ReadDataM
);
@ -83,16 +83,16 @@ module subwordread
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: if(`D_SUPPORTED)
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld
3'b100: if(`Q_SUPPORTED)
ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
else
ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
@ -122,10 +122,10 @@ module subwordread
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // fld
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu

View File

@ -92,13 +92,15 @@ module wallypipelinedcore (
logic FStallD;
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic FLoad2;
logic [`FLEN-1:0] FWriteDataM;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic FpLoadM;
logic FpLoadStoreM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
@ -253,7 +255,8 @@ module wallypipelinedcore (
.AtomicM, .TrapM,
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadM,
.FpLoadStoreM,
.FWriteDataM, .FLoad2,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataW, .FlushDCacheM,
@ -391,10 +394,12 @@ module wallypipelinedcore (
.RdM, .RdW, // which FP register to write to (from IEU)
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadM,
.FpLoadStoreM,
.FLoad2,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FWriteDataM, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FCvtIntResW, // fp -> int conversion result to be stored in int register
.FResSelW, // fpu result selection

View File

@ -46,7 +46,7 @@ void main(void)
int i, j;
int bias = 1023;
if ((fptr = fopen("testvectors","w")) == NULL) {
if ((fptr = fopen("testvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}

View File

@ -143,12 +143,13 @@ module earlytermination(
logic [$clog2(`DIVLEN/2+3)-1:0] Count;
logic WZero;
logic [`DIVLEN+3:0] W;
assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
// *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
assign DivDone = (DivStickyE | WZero);
assign DivStickyE = ~|Count;
assign DivNegStickyE = $signed(WS+WC) < 0;
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = Count;
// +1 for setup
// `DIVLEN/2 to get required number of bits

View File

@ -2,7 +2,7 @@
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified:
// Modified: cturek@hmc.edu June 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
@ -29,10 +29,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF)
`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN))
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk,
@ -131,11 +129,11 @@ module srtpreproc (
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}};
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocB = ExtraB << zeroCntB;
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
@ -228,14 +226,15 @@ module otfc2 #(parameter N=65) (
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext;
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
flopr #(N+3) QMreg(clk, Start, QMNext, QM);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin
QR = Q[N+1:0];

View File

@ -1,4 +1,4 @@
`define DIVLEN 65
`define DIVLEN 64
/////////////
// counter //
@ -17,7 +17,7 @@ module counter(input logic clk,
always @(posedge clk)
begin
if (count == `DIVLEN+1) done <= #1 1;
if (count == `DIVLEN + 2) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
@ -101,8 +101,8 @@ module testbench;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
req <= #5 1;
end
@ -110,8 +110,8 @@ module testbench;
always @(posedge clk)
begin
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
if (done)
begin
req <= #5 1;

View File

@ -34,7 +34,7 @@
string tvpaths[] = '{
"../../addins/imperas-riscv-tests/work/",
"../../tests/riscof/work/riscv-arch-test/",
"../../tests/riscof/work/wally-riscv-arch-test/",
"../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/",
"../../tests/imperas-riscv-tests/work/",
"../../benchmarks/riscv-coremark/work/",
"../../addins/embench-iot/"
@ -95,16 +95,16 @@ string tvpaths[] = '{
string wally64a[] = '{
`WALLYTEST,
"rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
"rv64i_m/privilege/WALLY-amo",
"rv64i_m/privilege/WALLY-lrsc",
"rv64i_m/privilege/WALLY-status-fp-enabled-01"
};
string wally32a[] = '{
`WALLYTEST,
"rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
"rv32i_m/privilege/WALLY-amo",
"rv32i_m/privilege/WALLY-lrsc",
"rv32i_m/privilege/WALLY-status-fp-enabled-01"
};
@ -1490,40 +1490,41 @@ string imperas32f[] = '{
string wally64i[] = '{
`WALLYTEST,
"rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
"rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
"rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
"rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
"rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
"rv64i_m/I/WALLY-ADD",
"rv64i_m/I/WALLY-SLT",
"rv64i_m/I/WALLY-SLTU",
"rv64i_m/I/WALLY-SUB",
"rv64i_m/I/WALLY-XOR"
};
string wally64priv[] = '{
`WALLYTEST,
"rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
"rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
"rv64i_m/privilege/WALLY-status-tw-01",
"rv64i_m/privilege/WALLY-csr-permission-s-01",
"rv64i_m/privilege/WALLY-csr-permission-u-01",
"rv64i_m/privilege/WALLY-minfo-01",
"rv64i_m/privilege/WALLY-misa-01",
"rv64i_m/privilege/WALLY-mmu-sv39",
"rv64i_m/privilege/WALLY-mmu-sv48",
"rv64i_m/privilege/WALLY-pma",
"rv64i_m/privilege/WALLY-pmp",
"rv64i_m/privilege/WALLY-trap-01",
"rv64i_m/privilege/WALLY-trap-s-01",
"rv64i_m/privilege/WALLY-trap-u-01",
"rv64i_m/privilege/WALLY-mie-01",
"rv64i_m/privilege/WALLY-sie-01",
"rv64i_m/privilege/WALLY-mtvec-01",
"rv64i_m/privilege/WALLY-stvec-01",
"rv64i_m/privilege/WALLY-status-mie-01",
"rv64i_m/privilege/WALLY-status-sie-01",
"rv64i_m/privilege/WALLY-trap-sret-01",
"rv64i_m/privilege/WALLY-status-tw-01",
"rv64i_m/privilege/WALLY-wfi-01"
};
string wally64periph[] = '{
`WALLYTEST,
"rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
"rv64i_m/privilege/WALLY-periph"
};
string wally32e[] = '{
@ -1568,38 +1569,127 @@ string imperas32f[] = '{
string wally32i[] = '{
`WALLYTEST,
"rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
"rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
"rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
"rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
"rv32i_m/I/src/WALLY-XOR.S/ref/Ref"
"rv32i_m/I/WALLY-ADD",
"rv32i_m/I/WALLY-SLT",
"rv32i_m/I/WALLY-SLTU",
"rv32i_m/I/WALLY-SUB",
"rv32i_m/I/WALLY-XOR"
};
string wally32priv[] = '{
`WALLYTEST,
"rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
"rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
"rv32i_m/privilege/WALLY-csr-permission-s-01",
"rv32i_m/privilege/WALLY-csr-permission-u-01",
"rv32i_m/privilege/WALLY-minfo-01",
"rv32i_m/privilege/WALLY-misa-01",
"rv32i_m/privilege/WALLY-mmu-sv32",
"rv32i_m/privilege/WALLY-pma",
"rv32i_m/privilege/WALLY-pmp",
"rv32i_m/privilege/WALLY-trap-01",
"rv32i_m/privilege/WALLY-trap-s-01",
"rv32i_m/privilege/WALLY-trap-u-01",
"rv32i_m/privilege/WALLY-mie-01",
"rv32i_m/privilege/WALLY-sie-01",
"rv32i_m/privilege/WALLY-mtvec-01",
"rv32i_m/privilege/WALLY-stvec-01",
"rv32i_m/privilege/WALLY-status-mie-01",
"rv32i_m/privilege/WALLY-status-sie-01",
"rv32i_m/privilege/WALLY-trap-sret-01",
"rv32i_m/privilege/WALLY-status-tw-01",
"rv32i_m/privilege/WALLY-wfi-01"
};
string wally32periph[] = '{
`WALLYTEST,
"rv32i_m/privilege/src/WALLY-gpio-01.S/ref/Ref"
"rv32i_m/privilege/WALLY-gpio-01",
"rv32i_m/privilege/WALLY-clint-01"
// "rv32i_m/privilege/WALLY-plic-01"
// "rv32i_m/privilege/WALLY-uart-01"
};
// riscof test paths, to replace existing paths once riscof flow is working
// string wally64a[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
// };
// string wally32a[] = '{
// `WALLYTEST,
// "rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
// };
// string wally64i[] = '{
// `WALLYTEST,
// "rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
// "rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
// "rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
// };
// string wally64priv[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
// "rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
// };
// string wally64periph[] = '{
// `WALLYTEST,
// "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
// };
// string wally32i[] = '{
// `WALLYTEST,
// "rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
// "rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
// "rv32i_m/I/src/WALLY-XOR.S/ref/Ref"
// };
// string wally32priv[] = '{
// `WALLYTEST,
// "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
// "rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
// };

View File

@ -5,8 +5,8 @@ NAME := synth
# defaults
export DESIGN ?= wallypipelinedcore
export FREQ ?= 4000
export CONFIG ?= rv64gc
export FREQ ?= 3402
export CONFIG ?= rv32e
# sky130 and sky90 presently supported
export TECH ?= tsmc28
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
@ -126,6 +126,8 @@ clean:
rm -f command.log
rm -f filenames*.log
rm -f power.saif
rm -f Synopsys_stack_trace_*.txt
rm -f crte_*.txt

View File

@ -7,6 +7,7 @@ import subprocess
from matplotlib.cbook import flatten
import matplotlib.pyplot as plt
import matplotlib.lines as lines
from wallySynth import testFreq
def synthsintocsv():
@ -26,7 +27,7 @@ def synthsintocsv():
writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area'])
for oneSynth in allSynths:
descrip = specReg.findall(oneSynth)
descrip = specReg.findall(oneSynth) #[30:]
width = descrip[2][:4]
config = descrip[2][4:]
if descrip[3][-2:] == 'nm':
@ -46,7 +47,7 @@ def synthsintocsv():
nums = [float(m) for m in nums]
metrics += nums
except:
print(config + tech + freq + " doesn't have reports")
print(width + config + tech + '_' + freq + " doesn't have reports")
if metrics == []:
pass
else:
@ -56,7 +57,7 @@ def synthsintocsv():
file.close()
def synthsfromcsv(filename):
Synth = namedtuple("Synth", " width config special tech freq delay area")
Synth = namedtuple("Synth", "width config special tech freq delay area")
with open(filename, newline='') as csvfile:
csvreader = csv.reader(csvfile)
global allSynths
@ -110,23 +111,26 @@ def freqPlot(tech, width, config):
plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png')
# plt.show()
def areaDelay(width, tech, freq, config=None, special=None):
def areaDelay(tech, freq, width=None, config=None, special=None):
delays, areas, labels = ([] for i in range(3))
for oneSynth in allSynths:
if (width == oneSynth.width) & (tech == oneSynth.tech) & (freq == oneSynth.freq):
if (special != None) & (oneSynth.special == special):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.config]
elif (config != None) & (oneSynth.config == config):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.special]
else:
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.config + '_' + oneSynth.special]
if (width==None) or (width == oneSynth.width):
if (tech == oneSynth.tech) & (freq == oneSynth.freq):
if (special != None) & (oneSynth.special == special):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.width + oneSynth.config]
elif (config != None) & (oneSynth.config == config):
delays += [oneSynth.delay]
areas += [oneSynth.area]
labels += [oneSynth.special]
# else:
# delays += [oneSynth.delay]
# areas += [oneSynth.area]
# labels += [oneSynth.config + '_' + oneSynth.special]
if width == None:
width = ''
f, (ax1) = plt.subplots(1, 1)
plt.scatter(delays, areas)
@ -154,8 +158,11 @@ def areaDelay(width, tech, freq, config=None, special=None):
# ending freq in 42 means fpu was turned off manually
if __name__ == '__main__':
synthsintocsv()
# synthsintocsv()
synthsfromcsv('Summary.csv')
freqPlot('tsmc28', 'rv64', 'gc')
areaDelay('rv32', 'tsmc28', 4200, config='gc')
areaDelay('rv32', 'tsmc28', 3042, special='')
freqPlot('tsmc28', 'rv32', 'e')
freqPlot('sky90', 'rv32', 'e')
areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc')
areaDelay('tsmc28', testFreq[1], special='')
areaDelay('sky90', testFreq[0], width='rv64', config='gc')
areaDelay('sky90', testFreq[0], special='')

View File

@ -1,5 +1,6 @@
#!/usr/bin/bash
make clean
mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p")
mv newRuns runs
mkdir newRuns

View File

@ -8,20 +8,22 @@ def runCommand(config, tech, freq):
command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)
subprocess.Popen(command, shell=True)
testFreq = [3000, 10000]
if __name__ == '__main__':
techs = ['sky90', 'tsmc28']
bestAchieved = [750, 3000]
sweepCenter = [870, 3000]
synthsToRun = []
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
for i in [0, 1]:
tech = techs[i]
f = bestAchieved[i]
for freq in [round(f+f*x/100) for x in arr]: # rv32e freq sweep
sc = sweepCenter[i]
f = testFreq[i]
for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep
synthsToRun += [['rv32e', tech, freq]]
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic']: # configs
for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs
synthsToRun += [[config, tech, f]]
for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations
config = 'rv64gc_' + mod

View File

@ -54,6 +54,7 @@ target_tests_nosim = \
WALLY-status-sie-01 \
WALLY-status-tw-01 \
WALLY-gpio-01 \
WALLY-clint-01 \
rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))

View File

@ -0,0 +1,9 @@
00000000 # msip zero on reset
00000000 # mip is zero
00000008 # mip msip bit is set
00000000 # mip msip bit is reset
00000000 # mip mtip bit is reset
FFFFFFFF # mtimecmp is same as written value
A5A5A5A5 # mtimecmph is same as written value
00000000 # mip mtip is zero
00000080 # mip mtip is set

View File

@ -1,5 +1,18 @@
00000000 # test reset to zero
00000000
00000000 # output_en
00000000 # output_val
00000000 # rise_ie
00000000 # rise_ip
00000000 # fall_ie
00000000 # fall_ip
00000000 # high_ie
00000000 # high_ip
00000000 # fall_ie
ffffffff # fall_ip
00000000 # iof_en
00000000 # iof_sel
00000000 # out_xor
A5A5A5A5 # test output pins
5A5AFFFF
00000000 # test input enables

View File

@ -0,0 +1,103 @@
///////////////////////////////////////////
//
// WALLY-gpio
//
// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
//
// Created 2022-06-16
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-32.h"
INIT_TESTS
TRAP_HANDLER m
j run_test_loop // begin test loop/table tests instead of executing inline code.
INIT_TEST_TABLE
END_TESTS
TEST_STACK_AND_DATA
.align 2
test_cases:
# ---------------------------------------------------------------------------------------------
# Test Contents
#
# Here is where the actual tests are held, or rather, what the actual tests do.
# each entry consists of 3 values that will be read in as follows:
#
# '.4byte [x28 Value], [x29 Value], [x30 value]'
# or
# '.4byte [address], [value], [test type]'
#
# The encoding for x30 test type values can be found in the test handler in the framework file
#
# ---------------------------------------------------------------------------------------------
# =========== Define CLINT registers ===========
.equ CLINT, 0x02000000
.equ msip, (CLINT+0x00)
.equ mtimecmp, (CLINT+0x4000) # doesn't necessarily reset to zero
.equ mtimecmph,(CLINT+0x4004)
.equ mtime, (CLINT+0xBFF8) # resets to zero but cannot be easily tested
.equ mtimeh, (CLINT+0xBFFC)
# =========== Verify verifiable registers reset to zero ===========
.4byte msip, 0x00000000, read32_test # msip reset to zero
# =========== msip tests ===========
.4byte msip, 0xFFFFFFFE, write32_test # write to invalid bits of msip
.4byte 0x0, 0x00000000, readmip_test # msip bit should be zero
.4byte msip, 0x00000001, write32_test # set msip to one
.4byte 0x0, 0x00000008, readmip_test # msip bit is set
.4byte msip, 0x00000000, write32_test # set msip to zero
.4byte 0x0, 0x00000000, readmip_test # msip bit is released
# =========== mtime write tests ===========
.4byte mtime, 0x00000000, write32_test # test we can write to mtime
.4byte mtimeh, 0x00000000, write32_test # test we can write to mtimeh
.4byte 0x0,0x00000000, readmip_test # mtip bit should be zero
# =========== mtimecmp tests ===========
.4byte mtimecmp, 0xFFFFFFFF, write32_test # verify mtimecmp is writable
.4byte mtimecmph, 0xA5A5A5A5, write32_test # verify mtimecmph is writable
.4byte mtimecmp, 0xFFFFFFFF, read32_test # read back value written to mtimecmp
.4byte mtimecmph, 0xA5A5A5A5, read32_test # read back value written to mtimecmph
.4byte mtime, 0xFFFFFFFF, write32_test # write to mtime
.4byte 0x0, 0x00000000, readmip_test # mtip should still be zero
.4byte mtimeh, 0xA5A5A5A6, write32_test # cause mtip to go high by making mtime > mtimecmp
.4byte 0x0, 0x00000080, readmip_test # mtip should be set
.4byte 0x0, 0x0, terminate_test # terminate tests
# =========== Experimental mtime counting test ===========
# .4byte mtimecmph, 0xFFFFFFFF, write32_test # make sure mtip isn't set until ready
# .4byte mtimeh, 0x0FFFFFFF, write32_test # write near max value to mtimeh
# .4byte mtime, 0x00000000, write32_test # write small value to mtime
# .4byte 0x0, 0x000000000, readmip_test # mtip should be zero
# .4byte mtimecmp, 0x00000001, write32_test # write slightly larger value than mtime to test mtime counting
# .4byte mtimecmph, 0x0FFFFFFF, write32_test # write same value as mtimeh to test mtime counting
# .4byte 0x0, 0x00000080, readmip_test # mtip should be set since it has been at least two cycles

View File

@ -70,9 +70,21 @@ test_cases:
# =========== Verify all registers reset to zero ===========
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
# *** add more
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
.4byte output_en, 0x00000000, read32_test # output_en reset to zero
.4byte output_val, 0x00000000, read32_test # output_val reset to zero
.4byte rise_ie, 0x00000000, read32_test # rise_ie reset to zero
.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero
.4byte fall_ie, 0x00000000, read32_test # fall_ie reset to zero
.4byte fall_ip, 0xffffffff, read32_test # fall_ip reset to ones (input_val is zero)
.4byte high_ie, 0x00000000, read32_test # high_ie reset to zero
.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero
.4byte low_ie, 0x00000000, read32_test # low_ie reset to zero
.4byte low_ip, 0x00000000, read32_test # low_ip reset to zero
.4byte iof_en, 0x00000000, read32_test # iof_en reset to zero
.4byte iof_sel, 0x00000000, read32_test # iof_sel reset to zero
.4byte out_xor, 0x00000000, read32_test # out_xor reset to zero
# =========== Test output and input pins ===========

View File

@ -857,6 +857,27 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
addi a6, a6, 8
.endm
.macro SETUP_PLIC
# Setup PLIC with a series of register writes
.equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3
.equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10
.equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register
.equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1
.equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1
.equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode)
.equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0
.equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode)
.equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1
.4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
.4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
.4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
.4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
.4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
.4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
.endm
.macro END_TESTS
// invokes one final ecall to return to machine mode then terminates this program, so the output is
// 0x8: termination called from U mode
@ -984,6 +1005,20 @@ read08_test:
addi a6, a6, 8
j test_loop // go to next test case
readmip_test: // read the MIP into the signature
csrr t2, mip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
readsip_test: // read the MIP into the signature
csrr t2, sip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
goto_s_mode:
// return to address in t3,
li a0, 3 // Trap handler behavior (go to supervisor mode)