diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index ca93d7e7b..506cc7c50 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -110,7 +110,7 @@ // division constants `define RADIX 32'h4 -`define DIVCOPIES 32'h3 +`define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input @@ -118,12 +118,17 @@ `define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) -// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +`define RK (`DIVCOPIES*`LOGR) // r*k used for intdiv preproc +`define LOGK ($clog2(`DIVCOPIES)) +`define LOGRK ($clog2(`RK)) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // one iteration is required for the integer bit for minimally redundent radix-4 `define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`QLEN-1) +`define DIVa (`DIVb+4-`XLEN) +`define DIVBLEN ($clog2(`DIVb+1)-1) `define USE_SRAM 0 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 604a0711f..3f9c7e8a5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -64,10 +64,12 @@ module fdivsqrt( logic Firstun; logic WZero; logic SpecialCaseM; + logic [`DIVBLEN:0] n, p, m; + logic OTFCSwap; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .OTFCSwap, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, @@ -77,10 +79,11 @@ module fdivsqrt( fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), - .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .DivBusy); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]), + .n, .p, .m, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 17cc3f5c2..d234144c4 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -38,6 +38,7 @@ module fdivsqrtiter( input logic XZeroE, YZeroE, input logic SqrtE, input logic SqrtM, + input logic OTFCSwap, input logic [`DIVb+3:0] X, input logic [`DIVN-2:0] Dpreproc, output logic [`DIVN-2:0] D, // U0.N-1 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 92bb1bd9b..9e9bdb10b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -39,6 +39,7 @@ module fdivsqrtpostproc( input logic SqrtM, input logic SpecialCaseM, input logic RemOp, + input logic [`DIVBLEN:0] n, p, m, output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index f1882ad6f..756c5cc9f 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -41,7 +41,9 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, - output logic [`NE+1:0] QeM, + output logic [`DIVBLEN:0] n, p, m, + output logic OTFCSwap, + output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc ); @@ -50,36 +52,56 @@ module fdivsqrtpreproc ( logic [`NF-1:0] PreprocB, PreprocY; logic [`NF+1:0] SqrtX; logic [`DIVb+3:0] DivX; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`DIVBLEN:0] L; logic [`NE+1:0] Qe; // Intdiv signals - logic [`DIVN-1:0] ZeroBufX, ZeroBufY; + logic [`DIVb-1:0] ZeroBufX, ZeroBufY; logic [`XLEN-1:0] PosA, PosB; - logic Signed, Aneg, Bneg; + logic As, Bs, OTFCSwapTemp; + logic [`XLEN-1:0] A64, B64; + logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; + logic [`DIVBLEN:0] pPlusr, pPrCeil; + logic [`LOGRK-1:0] pPrTrunc; + logic [`DIVb+3:0] PreShiftX; // ***can probably merge X LZC with conversion // cout the number of leading zeros - // Muxes needed for Int; add after Cedar Commit - assign ZeroBufX = MDUE ? {ForwardedSrcAE, {`DIVN-`XLEN{1'b0}}} : {Xm, {`DIVN-`NF-1{1'b0}}}; - assign ZeroBufY = MDUE ? {ForwardedSrcBE, {`DIVN-`XLEN{1'b0}}} : {Ym, {`DIVN-`NF-1{1'b0}}}; - lzc #(`NF+1) lzcX (Xm, XZeroCnt); - lzc #(`NF+1) lzcY (Ym, YZeroCnt); - assign Signed = Funct3E[0]; - assign Aneg = ForwardedSrcAE[`XLEN-1] & Signed; - assign Bneg = ForwardedSrcBE[`XLEN-1] & Signed; - assign PosA = Aneg ? -ForwardedSrcAE : ForwardedSrcAE; - assign PosB = Bneg ? -ForwardedSrcBE : ForwardedSrcBE; + assign As = ForwardedSrcAE[`XLEN-1] & Funct3E[0]; + assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0]; + assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; + assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; - assign PreprocX = Xm[`NF-1:0]<> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)}; + assign n = (pPrCeil << `LOGK) - 1; + assign IntBits = (`DIVBLEN)'(`RK) + p; + assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; + + assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; - else assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; + else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; // radix 2 radix 4 @@ -92,17 +114,18 @@ module fdivsqrtpreproc ( // r = 1 or 2 // DIVRESLEN/(r*`DIVCOPIES) flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM); - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe); + flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap); + expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m, .Qe); endmodule module expcalc( - input logic [`FMTBITS-1:0] Fmt, + input logic [`FMTBITS-1:0] Fmt, input logic [`NE-1:0] Xe, Ye, - input logic Sqrt, - input logic XZero, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] Qe + input logic Sqrt, + input logic XZero, + input logic [`DIVBLEN:0] L, m, + output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; logic [`NE+1:0] SXExp; @@ -133,10 +156,10 @@ module expcalc( 2'h2: Bias = (`NE-1)'(`H_BIAS); endcase end - assign SXExp = {2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - (`NE+1)'(`BIAS); + assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS); assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; // correct exponent for denormalized input's normalization shifts - assign DExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZero}}; + assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; assign Qe = Sqrt ? SExp : DExp; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 8ed1664af..09f82da81 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -61,7 +61,7 @@ module fdivsqrtstage2 ( // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); - // Sqrt F generatin + // Sqrt F generation fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); // Divisor multiple diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index 7b303ff43..e2b2573ed 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -42,7 +42,7 @@ module hptw input logic [1:0] STATUS_MPP, input logic [1:0] PrivilegeModeW, (* mark_debug = "true" *) input logic ITLBMissOrDAFaultNoTrapF, DTLBMissOrDAFaultNoTrapM, // TLB Miss - input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU + input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU *** change to ReadDataM input logic DCacheStallM, // stall from LSU output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs @@ -106,7 +106,6 @@ module hptw if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - logic SV39Mode; logic ReadAccess, WriteAccess; logic InvalidRead, InvalidWrite; logic UpperBitsUnequalPageFault; @@ -136,19 +135,9 @@ module hptw assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); - // *** turn into module common with code in tlbcontrol. - if (`XLEN==64) begin:rv64 - assign SV39Mode = (SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS] == `SV39); - // page fault if upper bits aren't all the same - logic UpperEqual39, UpperEqual48; - assign UpperEqual39 = &(TranslationVAdr[63:38]) | ~|(TranslationVAdr[63:38]); - assign UpperEqual48 = &(TranslationVAdr[63:47]) | ~|(TranslationVAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; - end else begin - assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; - end - + // Check for page faults + vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), + .SV39Mode(), .UpperBitsUnequalPageFault); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : @@ -190,26 +179,26 @@ module hptw // HPTWAdr muxing if (`XLEN==32) begin // RV32 - logic [9:0] VPN; - logic [`PPN_BITS-1:0] PPN; - assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state - assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; - assign HPTWReadAdr = {PPN, VPN, 2'b00}; - assign HPTWSize = 3'b010; + logic [9:0] VPN; + logic [`PPN_BITS-1:0] PPN; + assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state + assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; + assign HPTWReadAdr = {PPN, VPN, 2'b00}; + assign HPTWSize = 3'b010; end else begin // RV64 - logic [8:0] VPN; - logic [`PPN_BITS-1:0] PPN; - always_comb - case (WalkerState) // select VPN field based on HPTW state - L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; - L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; - L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; - default: VPN = TranslationVAdr[20:12]; - endcase - assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | - (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; - assign HPTWReadAdr = {PPN, VPN, 3'b000}; - assign HPTWSize = 3'b011; + logic [8:0] VPN; + logic [`PPN_BITS-1:0] PPN; + always_comb + case (WalkerState) // select VPN field based on HPTW state + L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; + L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; + L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; + default: VPN = TranslationVAdr[20:12]; + endcase + assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | + (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; + assign HPTWReadAdr = {PPN, VPN, 3'b000}; + assign HPTWSize = 3'b011; end // Initial state and misalignment for RV32/64 @@ -228,44 +217,33 @@ module hptw end // Page Table Walker FSM - // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states - // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the - // HPTW as shown below to keep the D$ setup time out of the critical path. - // *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead. - // *** address TYPE(statetype) flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb - case (WalkerState) - IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; - else NextWalkerState = IDLE; - L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; - else NextWalkerState = L2_ADR; - L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 - else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L2_RD; - else NextWalkerState = LEAF; - L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; - else NextWalkerState = L1_ADR; - L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 - else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L1_RD; - else NextWalkerState = LEAF; - L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; - else NextWalkerState = L0_ADR; - L0_ADR: if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; - L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; - LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; - else NextWalkerState = IDLE; - UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; - else NextWalkerState = LEAF; - default: begin - NextWalkerState = IDLE; // should never be reached - end - endcase // case (WalkerState) + case (WalkerState) + IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; + else NextWalkerState = IDLE; + L3_ADR: NextWalkerState = L3_RD; // first access in SV48 + L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; + else NextWalkerState = L2_ADR; + L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 + else NextWalkerState = LEAF; + L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; + else NextWalkerState = L1_ADR; + L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 + else if (ValidNonLeafPTE) NextWalkerState = L1_RD; + else NextWalkerState = LEAF; + L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; + else NextWalkerState = L0_ADR; + L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; + else NextWalkerState = IDLE; + UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; + else NextWalkerState = LEAF; + default: NextWalkerState = IDLE; // should never be reached + endcase // case (WalkerState) assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss; assign SelHPTW = WalkerState != IDLE; diff --git a/pipelined/src/mmu/tlb.sv b/pipelined/src/mmu/tlb.sv index 6954e1d95..2f4fd5560 100644 --- a/pipelined/src/mmu/tlb.sv +++ b/pipelined/src/mmu/tlb.sv @@ -116,16 +116,16 @@ module tlb #(parameter TLB_ENTRIES = 8, // we cache Misaligned along with the PTE? This only has to be computed once // in the hptw as it is always the same regardless of the VPN. if(`XLEN == 32) begin - assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 - assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; + assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 + assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; end else begin - logic GigapageMisaligned, TerapageMisaligned; - assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 - assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 - assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 - assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | - ((HitPageType == 2'b10) & GigapageMisaligned) | - ((HitPageType == 2'b01) & MegapageMisaligned); + logic GigapageMisaligned, TerapageMisaligned; + assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 + assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 + assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 + assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | + ((HitPageType == 2'b10) & GigapageMisaligned) | + ((HitPageType == 2'b01) & MegapageMisaligned); end assign VPN = VAdr[`VPN_BITS+11:12]; @@ -137,7 +137,7 @@ module tlb #(parameter TLB_ENTRIES = 8, tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) - tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, + tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .SATP_ASID, .Matches, .HitPageType, .CAMHit); tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); diff --git a/pipelined/src/mmu/tlbcontrol.sv b/pipelined/src/mmu/tlbcontrol.sv index 5a9e4852d..8b3da2f35 100644 --- a/pipelined/src/mmu/tlbcontrol.sv +++ b/pipelined/src/mmu/tlbcontrol.sv @@ -68,22 +68,12 @@ module tlbcontrol #(parameter ITLB = 0) ( // Grab the sv mode from SATP and determine whether translation should occur assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; - if (`XLEN==64) begin:rv64 - assign SV39Mode = (SATP_MODE == `SV39); - // page fault if upper bits aren't all the same - logic UpperEqual39, UpperEqual48; - assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); - assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; - end else begin - assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; - end // Determine whether TLB is being used assign TLBAccess = ReadAccess | WriteAccess; // Check whether upper bits of virtual addresss are all equal + vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; @@ -99,7 +89,7 @@ module tlbcontrol #(parameter ITLB = 0) ( assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end else begin - // fault for software handling if access bit is off + // fault for software handling if access bit is off assign DAPageFault = ~PTE_A; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end diff --git a/pipelined/src/mmu/vm64check.sv b/pipelined/src/mmu/vm64check.sv new file mode 100644 index 000000000..cedeb5267 --- /dev/null +++ b/pipelined/src/mmu/vm64check.sv @@ -0,0 +1,50 @@ +/////////////////////////////////////////// +// vm64check.sv +// +// Written: David_Harris@hmc.edu 4 November 2022 +// Modified: +// +// Purpose: Check for good upper address bits in RV64 mode +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module vm64check ( + input logic [`SVMODE_BITS-1:0] SATP_MODE, + input logic [`XLEN-1:0] VAdr, + output logic SV39Mode, UpperBitsUnequalPageFault +); + + if (`XLEN==64) begin:rv64 + assign SV39Mode = (SATP_MODE == `SV39); + // page fault if upper bits aren't all the same + logic UpperEqual39, UpperEqual48; + assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); + assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); + assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; + end else begin + assign SV39Mode = 0; + assign UpperBitsUnequalPageFault = 0; + end +endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 748be2280..228bc88f9 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -718,6 +718,7 @@ module testbenchfp; if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), + .MDUE(1'b0), .W64E(1'b0), .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), .QmM(Quot), .DivDone); end diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 91d3dcf12..633ecb81d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -55,9 +55,9 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/matmult-int/matmult-int", // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back "bd_speedopt_speed/src/minver/minver", - "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256", + "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 454d05be5..ca197876c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -1072,9 +1072,9 @@ uart_data_wait: li t3, 0x10000002 // IIR li a4, 0x61 uart_read_LSR_IIR: - lb t4, 0(t3) // save IIR before reading LSR mgith clear it + lbu t4, 0(t3) // save IIR before reading LSR might clear it // check if IIR is the rxfifotimeout interrupt. if it is, then read the fifo then go back and repeat this. - li t5, 6 + li t5, 0xCC // Value in IIR for Fifo Enabled, with timeout interrupt pending beq t4, t5, uart_rxfifo_timout lb t5, 0(t2) // read LSR andi t6, t5, 0x61 // wait until all transmissions are done and data is ready @@ -1083,7 +1083,6 @@ uart_read_LSR_IIR: uart_rxfifo_timout: li t4, 0x10000000 // read from the fifo lb t5, 0(t4) - lb t5, 0(t4) //read the fifo until empty j uart_read_LSR_IIR