Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2022-11-06 17:22:25 -06:00
commit e7d24609cd
13 changed files with 179 additions and 128 deletions

View File

@ -110,7 +110,7 @@
// division constants // division constants
`define RADIX 32'h4 `define RADIX 32'h4
`define DIVCOPIES 32'h3 `define DIVCOPIES 32'h2
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3)
// `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
`define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input `define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input
@ -118,12 +118,17 @@
`define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3)) `define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3))
`define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN) `define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
`define RK (`DIVCOPIES*`LOGR) // r*k used for intdiv preproc
`define LOGK ($clog2(`DIVCOPIES))
`define LOGRK ($clog2(`RK))
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
// one iteration is required for the integer bit for minimally redundent radix-4 // one iteration is required for the integer bit for minimally redundent radix-4
`define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
`define DURLEN ($clog2(`FPDUR+1)) `define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define DIVb (`QLEN-1) `define DIVb (`QLEN-1)
`define DIVa (`DIVb+4-`XLEN)
`define DIVBLEN ($clog2(`DIVb+1)-1)
`define USE_SRAM 0 `define USE_SRAM 0

View File

@ -64,10 +64,12 @@ module fdivsqrt(
logic Firstun; logic Firstun;
logic WZero; logic WZero;
logic SpecialCaseM; logic SpecialCaseM;
logic [`DIVBLEN:0] n, p, m;
logic OTFCSwap;
fdivsqrtpreproc fdivsqrtpreproc( fdivsqrtpreproc fdivsqrtpreproc(
.clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .OTFCSwap,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
fdivsqrtfsm fdivsqrtfsm( fdivsqrtfsm fdivsqrtfsm(
.clk, .reset, .FmtE, .XsE, .SqrtE, .clk, .reset, .FmtE, .XsE, .SqrtE,
@ -77,10 +79,11 @@ module fdivsqrt(
fdivsqrtiter fdivsqrtiter( fdivsqrtiter fdivsqrtiter(
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
.DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
.DivBusy); .DivBusy);
fdivsqrtpostproc fdivsqrtpostproc( fdivsqrtpostproc fdivsqrtpostproc(
.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun,
.SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]), .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]),
.n, .p, .m,
.QmM, .WZero, .DivSM); .QmM, .WZero, .DivSM);
endmodule endmodule

View File

@ -38,6 +38,7 @@ module fdivsqrtiter(
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic SqrtE, input logic SqrtE,
input logic SqrtM, input logic SqrtM,
input logic OTFCSwap,
input logic [`DIVb+3:0] X, input logic [`DIVb+3:0] X,
input logic [`DIVN-2:0] Dpreproc, input logic [`DIVN-2:0] Dpreproc,
output logic [`DIVN-2:0] D, // U0.N-1 output logic [`DIVN-2:0] D, // U0.N-1

View File

@ -39,6 +39,7 @@ module fdivsqrtpostproc(
input logic SqrtM, input logic SqrtM,
input logic SpecialCaseM, input logic SpecialCaseM,
input logic RemOp, input logic RemOp,
input logic [`DIVBLEN:0] n, p, m,
output logic [`DIVb:0] QmM, output logic [`DIVb:0] QmM,
output logic WZero, output logic WZero,
output logic DivSM output logic DivSM

View File

@ -41,7 +41,9 @@ module fdivsqrtpreproc (
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M, input logic [2:0] Funct3E, Funct3M,
input logic MDUE, W64E, input logic MDUE, W64E,
output logic [`NE+1:0] QeM, output logic [`DIVBLEN:0] n, p, m,
output logic OTFCSwap,
output logic [`NE+1:0] QeM,
output logic [`DIVb+3:0] X, output logic [`DIVb+3:0] X,
output logic [`DIVN-2:0] Dpreproc output logic [`DIVN-2:0] Dpreproc
); );
@ -50,36 +52,56 @@ module fdivsqrtpreproc (
logic [`NF-1:0] PreprocB, PreprocY; logic [`NF-1:0] PreprocB, PreprocY;
logic [`NF+1:0] SqrtX; logic [`NF+1:0] SqrtX;
logic [`DIVb+3:0] DivX; logic [`DIVb+3:0] DivX;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVBLEN:0] L;
logic [`NE+1:0] Qe; logic [`NE+1:0] Qe;
// Intdiv signals // Intdiv signals
logic [`DIVN-1:0] ZeroBufX, ZeroBufY; logic [`DIVb-1:0] ZeroBufX, ZeroBufY;
logic [`XLEN-1:0] PosA, PosB; logic [`XLEN-1:0] PosA, PosB;
logic Signed, Aneg, Bneg; logic As, Bs, OTFCSwapTemp;
logic [`XLEN-1:0] A64, B64;
logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
logic [`DIVBLEN:0] pPlusr, pPrCeil;
logic [`LOGRK-1:0] pPrTrunc;
logic [`DIVb+3:0] PreShiftX;
// ***can probably merge X LZC with conversion // ***can probably merge X LZC with conversion
// cout the number of leading zeros // cout the number of leading zeros
// Muxes needed for Int; add after Cedar Commit
assign ZeroBufX = MDUE ? {ForwardedSrcAE, {`DIVN-`XLEN{1'b0}}} : {Xm, {`DIVN-`NF-1{1'b0}}};
assign ZeroBufY = MDUE ? {ForwardedSrcBE, {`DIVN-`XLEN{1'b0}}} : {Ym, {`DIVN-`NF-1{1'b0}}};
lzc #(`NF+1) lzcX (Xm, XZeroCnt);
lzc #(`NF+1) lzcY (Ym, YZeroCnt);
assign Signed = Funct3E[0]; assign As = ForwardedSrcAE[`XLEN-1] & Funct3E[0];
assign Aneg = ForwardedSrcAE[`XLEN-1] & Signed; assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0];
assign Bneg = ForwardedSrcBE[`XLEN-1] & Signed; assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
assign PosA = Aneg ? -ForwardedSrcAE : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
assign PosB = Bneg ? -ForwardedSrcBE : ForwardedSrcBE;
assign PreprocX = Xm[`NF-1:0]<<XZeroCnt; assign OTFCSwapTemp = (As ^ Bs) & MDUE;
assign PreprocY = Ym[`NF-1:0]<<YZeroCnt;
assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign PosA = As ? -A64 : A64;
assign PosB = Bs ? -B64 : B64;
assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
lzc #(`DIVb) lzcX (ZeroBufX, L);
lzc #(`DIVb) lzcY (ZeroBufY, m);
assign PreprocX = Xm[`NF-1:0]<<L;
assign PreprocY = Ym[`NF-1:0]<<m;
assign ZeroDiff = m - L;
assign p = ZeroDiff[`DIVBLEN] ? '0 : ZeroDiff;
assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
assign pPrTrunc = pPlusr[`LOGRK-1:0];
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
assign n = (pPrCeil << `LOGK) - 1;
assign IntBits = (`DIVBLEN)'(`RK) + p;
assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
// *** explain why X is shifted between radices (initial assignment of WS=RX) // *** explain why X is shifted between radices (initial assignment of WS=RX)
if (`RADIX == 2) assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
else assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX;
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
// radix 2 radix 4 // radix 2 radix 4
@ -92,17 +114,18 @@ module fdivsqrtpreproc (
// r = 1 or 2 // r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES) // DIVRESLEN/(r*`DIVCOPIES)
flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM); flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe); flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m, .Qe);
endmodule endmodule
module expcalc( module expcalc(
input logic [`FMTBITS-1:0] Fmt, input logic [`FMTBITS-1:0] Fmt,
input logic [`NE-1:0] Xe, Ye, input logic [`NE-1:0] Xe, Ye,
input logic Sqrt, input logic Sqrt,
input logic XZero, input logic XZero,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, input logic [`DIVBLEN:0] L, m,
output logic [`NE+1:0] Qe output logic [`NE+1:0] Qe
); );
logic [`NE-2:0] Bias; logic [`NE-2:0] Bias;
logic [`NE+1:0] SXExp; logic [`NE+1:0] SXExp;
@ -133,10 +156,10 @@ module expcalc(
2'h2: Bias = (`NE-1)'(`H_BIAS); 2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase endcase
end end
assign SXExp = {2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - (`NE+1)'(`BIAS); assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
// correct exponent for denormalized input's normalization shifts // correct exponent for denormalized input's normalization shifts
assign DExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZero}}; assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
assign Qe = Sqrt ? SExp : DExp; assign Qe = Sqrt ? SExp : DExp;
endmodule endmodule

View File

@ -61,7 +61,7 @@ module fdivsqrtstage2 (
// 0001 = -2 // 0001 = -2
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
// Sqrt F generatin // Sqrt F generation
fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F);
// Divisor multiple // Divisor multiple

View File

@ -42,7 +42,7 @@ module hptw
input logic [1:0] STATUS_MPP, input logic [1:0] STATUS_MPP,
input logic [1:0] PrivilegeModeW, input logic [1:0] PrivilegeModeW,
(* mark_debug = "true" *) input logic ITLBMissOrDAFaultNoTrapF, DTLBMissOrDAFaultNoTrapM, // TLB Miss (* mark_debug = "true" *) input logic ITLBMissOrDAFaultNoTrapF, DTLBMissOrDAFaultNoTrapM, // TLB Miss
input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU *** change to ReadDataM
input logic DCacheStallM, // stall from LSU input logic DCacheStallM, // stall from LSU
output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [`XLEN-1:0] PTE, // page table entry to TLBs
output logic [1:0] PageType, // page type to TLBs output logic [1:0] PageType, // page type to TLBs
@ -106,7 +106,6 @@ module hptw
if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites
logic SV39Mode;
logic ReadAccess, WriteAccess; logic ReadAccess, WriteAccess;
logic InvalidRead, InvalidWrite; logic InvalidRead, InvalidWrite;
logic UpperBitsUnequalPageFault; logic UpperBitsUnequalPageFault;
@ -136,19 +135,9 @@ module hptw
assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) |
((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); ((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk));
// *** turn into module common with code in tlbcontrol. // Check for page faults
if (`XLEN==64) begin:rv64 vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr),
assign SV39Mode = (SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS] == `SV39); .SV39Mode(), .UpperBitsUnequalPageFault);
// page fault if upper bits aren't all the same
logic UpperEqual39, UpperEqual48;
assign UpperEqual39 = &(TranslationVAdr[63:38]) | ~|(TranslationVAdr[63:38]);
assign UpperEqual48 = &(TranslationVAdr[63:47]) | ~|(TranslationVAdr[63:47]);
assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48;
end else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable);
assign InvalidWrite = WriteAccess & ~Writable; assign InvalidWrite = WriteAccess & ~Writable;
assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid :
@ -190,26 +179,26 @@ module hptw
// HPTWAdr muxing // HPTWAdr muxing
if (`XLEN==32) begin // RV32 if (`XLEN==32) begin // RV32
logic [9:0] VPN; logic [9:0] VPN;
logic [`PPN_BITS-1:0] PPN; logic [`PPN_BITS-1:0] PPN;
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
assign HPTWReadAdr = {PPN, VPN, 2'b00}; assign HPTWReadAdr = {PPN, VPN, 2'b00};
assign HPTWSize = 3'b010; assign HPTWSize = 3'b010;
end else begin // RV64 end else begin // RV64
logic [8:0] VPN; logic [8:0] VPN;
logic [`PPN_BITS-1:0] PPN; logic [`PPN_BITS-1:0] PPN;
always_comb always_comb
case (WalkerState) // select VPN field based on HPTW state case (WalkerState) // select VPN field based on HPTW state
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
default: VPN = TranslationVAdr[20:12]; default: VPN = TranslationVAdr[20:12];
endcase endcase
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
assign HPTWReadAdr = {PPN, VPN, 3'b000}; assign HPTWReadAdr = {PPN, VPN, 3'b000};
assign HPTWSize = 3'b011; assign HPTWSize = 3'b011;
end end
// Initial state and misalignment for RV32/64 // Initial state and misalignment for RV32/64
@ -228,44 +217,33 @@ module hptw
end end
// Page Table Walker FSM // Page Table Walker FSM
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
// HPTW as shown below to keep the D$ setup time out of the critical path.
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
// *** address TYPE(statetype)
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb always_comb
case (WalkerState) case (WalkerState)
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
else NextWalkerState = IDLE; else NextWalkerState = IDLE;
L3_ADR: NextWalkerState = L3_RD; // first access in SV48 L3_ADR: NextWalkerState = L3_RD; // first access in SV48
L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; L3_RD: if (DCacheStallM) NextWalkerState = L3_RD;
else NextWalkerState = L2_ADR; else NextWalkerState = L2_ADR;
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39
else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages else NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = L2_RD; L2_RD: if (DCacheStallM) NextWalkerState = L2_RD;
else NextWalkerState = LEAF; else NextWalkerState = L1_ADR;
L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32
else NextWalkerState = L1_ADR; else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 else NextWalkerState = LEAF;
else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages L1_RD: if (DCacheStallM) NextWalkerState = L1_RD;
else if (ValidNonLeafPTE) NextWalkerState = L1_RD; else NextWalkerState = L0_ADR;
else NextWalkerState = LEAF; L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD;
L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; else NextWalkerState = LEAF;
else NextWalkerState = L0_ADR; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD;
L0_ADR: if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages else NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = L0_RD; LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE;
else NextWalkerState = LEAF; else NextWalkerState = IDLE;
L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE;
else NextWalkerState = LEAF; else NextWalkerState = LEAF;
LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; default: NextWalkerState = IDLE; // should never be reached
else NextWalkerState = IDLE; endcase // case (WalkerState)
UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE;
else NextWalkerState = LEAF;
default: begin
NextWalkerState = IDLE; // should never be reached
end
endcase // case (WalkerState)
assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss; assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss;
assign SelHPTW = WalkerState != IDLE; assign SelHPTW = WalkerState != IDLE;

View File

@ -116,16 +116,16 @@ module tlb #(parameter TLB_ENTRIES = 8,
// we cache Misaligned along with the PTE? This only has to be computed once // we cache Misaligned along with the PTE? This only has to be computed once
// in the hptw as it is always the same regardless of the VPN. // in the hptw as it is always the same regardless of the VPN.
if(`XLEN == 32) begin if(`XLEN == 32) begin
assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0
assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned;
end else begin end else begin
logic GigapageMisaligned, TerapageMisaligned; logic GigapageMisaligned, TerapageMisaligned;
assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0
assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0
assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0
assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) |
((HitPageType == 2'b10) & GigapageMisaligned) | ((HitPageType == 2'b10) & GigapageMisaligned) |
((HitPageType == 2'b01) & MegapageMisaligned); ((HitPageType == 2'b01) & MegapageMisaligned);
end end
assign VPN = VAdr[`VPN_BITS+11:12]; assign VPN = VAdr[`VPN_BITS+11:12];
@ -137,7 +137,7 @@ module tlb #(parameter TLB_ENTRIES = 8,
tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables);
tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS)
tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs,
.SATP_ASID, .Matches, .HitPageType, .CAMHit); .SATP_ASID, .Matches, .HitPageType, .CAMHit);
tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs);

View File

@ -68,22 +68,12 @@ module tlbcontrol #(parameter ITLB = 0) (
// Grab the sv mode from SATP and determine whether translation should occur // Grab the sv mode from SATP and determine whether translation should occur
assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1
assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation;
if (`XLEN==64) begin:rv64
assign SV39Mode = (SATP_MODE == `SV39);
// page fault if upper bits aren't all the same
logic UpperEqual39, UpperEqual48;
assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]);
assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]);
assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48;
end else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
// Determine whether TLB is being used // Determine whether TLB is being used
assign TLBAccess = ReadAccess | WriteAccess; assign TLBAccess = ReadAccess | WriteAccess;
// Check whether upper bits of virtual addresss are all equal // Check whether upper bits of virtual addresss are all equal
vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault);
// unswizzle useful PTE bits // unswizzle useful PTE bits
assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; assign {PTE_D, PTE_A} = PTEAccessBits[7:6];
@ -99,7 +89,7 @@ module tlbcontrol #(parameter ITLB = 0) (
assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault;
assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V));
end else begin end else begin
// fault for software handling if access bit is off // fault for software handling if access bit is off
assign DAPageFault = ~PTE_A; assign DAPageFault = ~PTE_A;
assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V));
end end

View File

@ -0,0 +1,50 @@
///////////////////////////////////////////
// vm64check.sv
//
// Written: David_Harris@hmc.edu 4 November 2022
// Modified:
//
// Purpose: Check for good upper address bits in RV64 mode
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module vm64check (
input logic [`SVMODE_BITS-1:0] SATP_MODE,
input logic [`XLEN-1:0] VAdr,
output logic SV39Mode, UpperBitsUnequalPageFault
);
if (`XLEN==64) begin:rv64
assign SV39Mode = (SATP_MODE == `SV39);
// page fault if upper bits aren't all the same
logic UpperEqual39, UpperEqual48;
assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]);
assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]);
assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48;
end else begin
assign SV39Mode = 0;
assign UpperBitsUnequalPageFault = 0;
end
endmodule

View File

@ -718,6 +718,7 @@ module testbenchfp;
if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart),
.MDUE(1'b0), .W64E(1'b0),
.StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp),
.QmM(Quot), .DivDone); .QmM(Quot), .DivDone);
end end

View File

@ -55,9 +55,9 @@ string tvpaths[] = '{
"bd_speedopt_speed/src/matmult-int/matmult-int", "bd_speedopt_speed/src/matmult-int/matmult-int",
// "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back
"bd_speedopt_speed/src/minver/minver", "bd_speedopt_speed/src/minver/minver",
"bd_speedopt_speed/src/nbody/nbody",
"bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-aes/nettle-aes",
"bd_speedopt_speed/src/nettle-sha256/nettle-sha256", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256",
"bd_speedopt_speed/src/nbody/nbody",
"bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/nsichneu/nsichneu",
"bd_speedopt_speed/src/picojpeg/picojpeg", "bd_speedopt_speed/src/picojpeg/picojpeg",
// "bd_speedopt_speed/src/primecount/primecount", // "bd_speedopt_speed/src/primecount/primecount",

View File

@ -1072,9 +1072,9 @@ uart_data_wait:
li t3, 0x10000002 // IIR li t3, 0x10000002 // IIR
li a4, 0x61 li a4, 0x61
uart_read_LSR_IIR: uart_read_LSR_IIR:
lb t4, 0(t3) // save IIR before reading LSR mgith clear it lbu t4, 0(t3) // save IIR before reading LSR might clear it
// check if IIR is the rxfifotimeout interrupt. if it is, then read the fifo then go back and repeat this. // check if IIR is the rxfifotimeout interrupt. if it is, then read the fifo then go back and repeat this.
li t5, 6 li t5, 0xCC // Value in IIR for Fifo Enabled, with timeout interrupt pending
beq t4, t5, uart_rxfifo_timout beq t4, t5, uart_rxfifo_timout
lb t5, 0(t2) // read LSR lb t5, 0(t2) // read LSR
andi t6, t5, 0x61 // wait until all transmissions are done and data is ready andi t6, t5, 0x61 // wait until all transmissions are done and data is ready
@ -1083,7 +1083,6 @@ uart_read_LSR_IIR:
uart_rxfifo_timout: uart_rxfifo_timout:
li t4, 0x10000000 // read from the fifo li t4, 0x10000000 // read from the fifo
lb t5, 0(t4) lb t5, 0(t4)
lb t5, 0(t4)
//read the fifo until empty //read the fifo until empty
j uart_read_LSR_IIR j uart_read_LSR_IIR