From 9b8bcb8e57f72dd67ad7dbb74e66727ae9d9050d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 23 Jun 2021 16:43:22 -0500 Subject: [PATCH 01/38] Split the ReadDataW bus into two parts in preparation for the data cache. On the AHB side it is now HRDATAW and on the CPU to data cache side it is ReadDataW. lsu.sv now handles the connection between the two. Also reorganized the inputs and outputs of lsu and pagetablewalker into connects between CPU, pagetablewalker, and AHB. Finally add DisableTranslation to TLB as teh pagetablewalker will need to force no translation when active regardless of the state of SATP. With Kip. --- wally-pipelined/src/ebu/ahblite.sv | 17 +++-- wally-pipelined/src/ieu/ieu.sv | 23 ++++--- wally-pipelined/src/ifu/ifu.sv | 1 + wally-pipelined/src/lsu/lsu.sv | 66 ++++++++++++------- wally-pipelined/src/mmu/mmu.sv | 3 +- wally-pipelined/src/mmu/pagetablewalker.sv | 12 +++- wally-pipelined/src/mmu/tlb.sv | 1 + .../src/wally/wallypipelinedhart.sv | 28 +++++--- 8 files changed, 96 insertions(+), 55 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index c59dfa9b..7a486a96 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -62,7 +62,7 @@ module ahblite ( // Signals to PMA checker (metadata of proposed access) output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // Return from bus - output logic [`XLEN-1:0] ReadDataW, + output logic [`XLEN-1:0] HRDATAW, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -87,7 +87,7 @@ module ahblite ( logic GrantData; logic [31:0] AccessAddress; logic [2:0] AccessSize, PTESize, ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedHRDATAMasked, HRDATANext, WriteData; logic IReady, DReady; logic CaptureDataM,CapturedDataAvailable; @@ -195,14 +195,13 @@ module ahblite ( assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) || ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE)); assign MMUReadPTE = HRDATA; - assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 // Carefully decide when to update ReadDataW // ReadDataMstored holds the most recent memory read. // We need to wait until the pipeline actually advances before we can update the contents of ReadDataW // (or else the W stage will accidentally get the M stage's data when the pipeline does advance). assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)); - flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, CapturedData); + flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, HRDATAMasked, CapturedHRDATAMasked); always @(posedge HCLK, negedge HRESETn) if (~HRESETn) @@ -211,11 +210,11 @@ module ahblite ( CapturedDataAvailable <= #1 (StallW) ? (CaptureDataM | CapturedDataAvailable) : 1'b0; always_comb casez({StallW && (BusState != ATOMICREAD),CapturedDataAvailable}) - 2'b00: ReadDataWnext = ReadDataM; - 2'b01: ReadDataWnext = CapturedData; - 2'b1?: ReadDataWnext = ReadDataW; + 2'b00: HRDATANext = HRDATAMasked; + 2'b01: HRDATANext = CapturedHRDATAMasked; + 2'b1?: HRDATANext = HRDATAW; endcase - flopr #(`XLEN) ReadDataOldWReg(clk, reset, ReadDataWnext, ReadDataW); + flopr #(`XLEN) ReadDataOldWReg(clk, reset, HRDATANext, HRDATAW); // Extract and sign-extend subwords if necessary subwordread swr(.*); @@ -226,7 +225,7 @@ module ahblite ( logic [`XLEN-1:0] AMOResult; // amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), // .result(AMOResult)); - amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), + amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); end else diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 0bd9d598..bcffce8a 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -41,16 +41,19 @@ module ieu ( output logic [2:0] Funct3E, output logic [`XLEN-1:0] SrcAE, SrcBE, // Memory stage interface - input logic DataMisalignedM, - input logic DataAccessFaultM, - input logic SquashSCW, - input logic FWriteIntM, - input logic [`XLEN-1:0] FWriteDataM, - output logic [1:0] MemRWM, - output logic [1:0] AtomicM, - output logic [`XLEN-1:0] MemAdrM, WriteDataM, - output logic [`XLEN-1:0] SrcAM, - output logic [2:0] Funct3M, + input logic DataMisalignedM, // from LSU + input logic SquashSCW, // from LSU + output logic [1:0] MemRWM, // read/write control goes to LSU + output logic [1:0] AtomicM, // atomic control goes to LSU + output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU + + output logic [2:0] Funct3M, // size and signedness to LSU + + + input logic FWriteIntM, // from FPU + input logic [`XLEN-1:0] FWriteDataM, // from FPU + output logic [`XLEN-1:0] SrcAM, // to privilege and fpu + // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index afae5ff4..46d7d0ea 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -122,6 +122,7 @@ module ifu ( .AtomicAccessM(1'b0), .WriteAccessM(1'b0), .ReadAccessM(1'b0), // *** is this the right way force these bits constant? should they be someething else? .SquashBusAccess(ISquashBusAccessF), .HSELRegions(IHSELRegionsF), + .DisableTranslation(1'b0), .*); diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ffa79adf..eb6dae26 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -33,37 +33,53 @@ module lsu ( input logic StallM, FlushM, StallW, FlushW, //output logic DataStall, // Memory Stage + + // connected to cpu (controls) input logic [1:0] MemRWM, - input logic [`XLEN-1:0] MemAdrM, input logic [2:0] Funct3M, - //input logic [`XLEN-1:0] ReadDataW, - input logic [`XLEN-1:0] WriteDataM, input logic [1:0] AtomicM, - input logic CommitM, - output logic [`PA_BITS-1:0] MemPAdrM, - output logic MemReadM, MemWriteM, - output logic [1:0] AtomicMaskedM, - output logic DataMisalignedM, - output logic CommittedM, - // Writeback Stage - input logic MemAckW, - input logic [`XLEN-1:0] ReadDataW, + output logic CommittedM, output logic SquashSCW, + output logic DataMisalignedM, + input logic DisableTranslation, + + // address and write data + input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, + output logic [`XLEN-1:0] ReadDataW, // from ahb + + // cpu privilege + input logic [1:0] PrivilegeModeW, + input logic DTLBFlushM, // faults - input logic NonBusTrapM, - input logic DataAccessFaultM, + input logic NonBusTrapM, output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, + // cpu hazard unit (trap) output logic StoreMisalignedFaultM, StoreAccessFaultM, - + + // connect to ahb + input logic CommitM, // should this be generated in the abh interface? + output logic [`PA_BITS-1:0] MemPAdrM, // to ahb + output logic MemReadM, MemWriteM, + output logic [1:0] AtomicMaskedM, + input logic MemAckW, // from ahb + input logic [`XLEN-1:0] HRDATAW, // from ahb + + // mmu management - input logic [1:0] PrivilegeModeW, + + // page table walker input logic [`XLEN-1:0] PageTableEntryM, input logic [1:0] PageTypeM, - input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, - input logic DTLBWriteM, DTLBFlushM, - output logic DTLBMissM, DTLBHitM, + input logic [`XLEN-1:0] SATP_REGW, // from csr + input logic STATUS_MXR, STATUS_SUM, // from csr + input logic DTLBWriteM, + output logic DTLBMissM, + + + + output logic DTLBHitM, // not connected // PMA/PMP (inside mmu) signals input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. @@ -94,7 +110,11 @@ module lsu ( logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. - + + // for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the + // CPU's read data input ReadDataW. + assign ReadDataW = HRDATAW; + mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM), @@ -135,9 +155,9 @@ module lsu ( // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign LoadAccessFaultM = DataAccessFaultM & MemRWM[1]; + assign LoadAccessFaultM = MemRWM[1]; assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0]; + assign StoreAccessFaultM = MemRWM[0]; // Handle atomic load reserved / store conditional generate diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index ff315f12..e6d003b3 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -44,6 +44,7 @@ module mmu #(parameter ENTRY_BITS = 3, // x1 - TLB is accessed for a write // 11 - TLB is accessed for both read and write input logic [1:0] TLBAccessType, + input logic DisableTranslation, // Virtual address input input logic [`XLEN-1:0] VirtualAddress, @@ -96,4 +97,4 @@ module mmu #(parameter ENTRY_BITS = 3, assign SquashBusAccess = PMASquashBusAccess || PMPSquashBusAccess; -endmodule \ No newline at end of file +endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 70ca1ac3..785a4aa7 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -49,13 +49,19 @@ module pagetablewalker ( output logic [1:0] PageTypeF, PageTypeM, output logic ITLBWriteF, DTLBWriteM, - // Signals from ahblite (PTEs from memory) + + + + // *** modify to send to LSU input logic [`XLEN-1:0] MMUReadPTE, input logic MMUReady, - // Signals to ahblite (memory addresses to access) + // *** modify to send to LSU output logic [`XLEN-1:0] MMUPAdr, - output logic MMUTranslate, + output logic MMUTranslate, // *** rename to HPTWReq + + + // Stall signal output logic MMUStall, diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 127dc5a5..9431fc62 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -65,6 +65,7 @@ module tlb #(parameter ENTRY_BITS = 3, // x1 - TLB is accessed for a write // 11 - TLB is accessed for both read and write input logic [1:0] TLBAccessType, + input logic DisableTranslation, // Virtual address input input logic [`XLEN-1:0] VirtualAddress, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index b32770b9..303fd5ad 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -153,6 +153,8 @@ module wallypipelinedhart ( logic[`XLEN-1:0] WriteDatatmpM; logic [4:0] InstrClassM; + + logic [`XLEN-1:0] HRDATAW; ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache @@ -161,15 +163,6 @@ module wallypipelinedhart ( mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - lsu lsu(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*); // data cache unit - - ahblite ebu( - //.InstrReadF(1'b0), - //.InstrRData(InstrF), // hook up InstrF later - .WriteDataM(WriteDatatmpM), - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .Funct7M(InstrM[31:25]), - .*); pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall // *** can connect to hazard unit @@ -181,6 +174,23 @@ module wallypipelinedhart ( .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); */ + // arbiter between IEU and pagetablewalker + + + lsu lsu(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*, + .ReadDataW(ReadDataW), + .DisableTranslation(1'b0) // *** will connect to page table walker arbiter +); // data cache unit + + ahblite ebu( + //.InstrReadF(1'b0), + //.InstrRData(InstrF), // hook up InstrF later + .WriteDataM(WriteDatatmpM), + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .Funct7M(InstrM[31:25]), + .HRDATAW(HRDATAW), + .*); + muldiv mdu(.*); // multiply and divide unit From 286b4b5b26af5f5978e02004e6ec1d28e2449333 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 23 Jun 2021 17:03:54 -0500 Subject: [PATCH 02/38] Partial addition of page table walker arbiter. --- wally-pipelined/src/lsu/lsu.sv | 2 ++ wally-pipelined/src/lsu/lsuArb.sv | 55 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 wally-pipelined/src/lsu/lsuArb.sv diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index eb6dae26..f20327bc 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -41,6 +41,8 @@ module lsu ( output logic CommittedM, output logic SquashSCW, output logic DataMisalignedM, + + // part of the page table walker input logic DisableTranslation, // address and write data diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv new file mode 100644 index 00000000..14dcd6f7 --- /dev/null +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -0,0 +1,55 @@ +/////////////////////////////////////////// +// lsuArb.sv +// +// Written: Ross THompson and Kip Macsai-Goren +// Modified: kmacsaigoren@hmc.edu June 23, 2021 +// +// Purpose: LSU arbiter between the CPU's demand request for data memory and +// the page table walker +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module lsuArb + (input logic clk, reset, + + // signals from page table walker + output logic [`XLEN-1:0] MMUReadPTE, + input logic MMUTranslate, // *** rename to HPTWReq + output logic MMUReady, + input logic [`XLEN-1:0] MMUPAdr, + + // signal from CPU + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + // back to CPU + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, + + // to LSU + output logic DisableTranslation, + output logic [1:0] MemRWMtoLSU, + output logic [2:0] Funct3MtoLSU, + output logic [1:0] AtomicMtoLSU, + + + +endmodule From c8f80967a61a698ad73fd69396804af2a5ba2f62 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Wed, 23 Jun 2021 19:59:06 -0400 Subject: [PATCH 03/38] added a few very simple arbitrations in the lsuArb that pass regression. cleaned up a few unused signals. Added several comments and concerns to lsuarb so I can remember where my thoughts were at the end of the day. --- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/lsu/lsu.sv | 6 +- wally-pipelined/src/lsu/lsuArb.sv | 85 ++++++++++++++----- wally-pipelined/src/mmu/mmu.sv | 2 +- wally-pipelined/src/mmu/pagetablewalker.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 21 +++-- 6 files changed, 87 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 46d7d0ea..909644a8 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -76,7 +76,7 @@ module ifu ( // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H input logic [31:0] HADDR, - input logic [2:0] HSIZE, HBURST, + input logic [2:0] HSIZE, input logic HWRITE, input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage. input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index f20327bc..f53bc7c8 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -42,9 +42,6 @@ module lsu ( output logic SquashSCW, output logic DataMisalignedM, - // part of the page table walker - input logic DisableTranslation, - // address and write data input logic [`XLEN-1:0] MemAdrM, input logic [`XLEN-1:0] WriteDataM, @@ -78,6 +75,7 @@ module lsu ( input logic STATUS_MXR, STATUS_SUM, // from csr input logic DTLBWriteM, output logic DTLBMissM, + input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. @@ -85,7 +83,7 @@ module lsu ( // PMA/PMP (inside mmu) signals input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. - input logic [2:0] HSIZE, HBURST, + input logic [2:0] HSIZE, input logic HWRITE, input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 14dcd6f7..832ea423 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -26,29 +26,76 @@ `include "wally-config.vh" -module lsuArb - (input logic clk, reset, +module lsuArb ( + input logic clk, reset, // signals from page table walker - output logic [`XLEN-1:0] MMUReadPTE, - input logic MMUTranslate, // *** rename to HPTWReq - output logic MMUReady, - input logic [`XLEN-1:0] MMUPAdr, +// output logic [`XLEN-1:0] MMUReadPTE, // *** it seems like this is the value out of the ahblite that gets sent back to the ptw. I don;t think it needs to get checked until the next paddr has been extracted from it. + input logic MMUTranslate, // *** rename to HPTWReq +// output logic MMUReady, // *** Similar reason to mmuReadPTE + input logic [`XLEN-1:0] MMUPAdr, - // signal from CPU - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [1:0] AtomicM, - // back to CPU - output logic CommittedM, - output logic SquashSCW, - output logic DataMisalignedM, + // signal from CPU + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + input logic [`XLEN-1:0] MemAdrM, // memory addrress to be checked coming from the CPU. *** this will be used to arbitrate to decide HADDR going into the PM checks, but it also gets sent in its normal form to the lsu because we need the virtual address for the tlb. + // back to CPU - // to LSU - output logic DisableTranslation, - output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, - output logic [1:0] AtomicMtoLSU, + /* *** unused for not (23 June 2021) + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, +*/ + // to LSU + output logic DisableTranslation, + output logic [1:0] MemRWMtoLSU, + output logic [2:0] Funct3MtoLSU, + output logic [1:0] AtomicMtoLSU + + /* *********** KMG: A lot of the rest of the signals that need to be arbitrated are going to be very annoying + these are the ones that used to get sent from the ahb to the pma checkers. but our eventual + goal is to have many of them sent thru the pmp/pma FIRST before the bus can get to them. + + deciding how to choose the right Haddr for the PM checkers will be difficult since they currently get + HADDR from the ahblite which seems like it could come from any number of sources, while we will eventually be narrowing it down to two possible sources. + + other problems arise when some signals like HSIZE are used in the PM checks but there's also a differnent size input to the tlb and both of these get to go through the mmu. + which one should be chosen for which device? can the be merged somehow? + +*/ + + /*// pmp/pma specifics sent through lsu + output logic [`XLEN-1:0] HADDRtoLSU, + output logic [2:0] HSIZEtoLSU // *** May not actually need to be arbitrated, since I'm +*/ +); + +/* *** these are all the signals that get sent to the pmp/pma chackers straight from the ahblite. We want to switch it around so the + checkers get these signals first and then the newly checked values can get sent to the ahblite. + input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. + input logic [2:0] HSIZE, + input logic HWRITE, + input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. +*/ + + generate + if (`XLEN == 32) begin + + assign Funct3MtoLSU = MMUTranslate ? 3'b010 : Funct3M; // *** is this the right thing for the msB? + + end else begin + + assign Funct3MtoLSU = MMUTranslate ? 3'b011 : Funct3M; // *** is this the right thing for the msB? + + end + endgenerate + + assign AtomicMtoLSU = MMUTranslate ? 2'b00 : AtomicM; + assign MemRWMtoLSU = MemRWM; // *** along with the rest of the lsu, the mmu uses memrwm in it's pure form so I think we can just forward it through + assign DisableTranslation = MMUTranslate; +// assign HADDRtoLSU = MMUTranslate ? MMUPAdr : MemAdrM; // *** Potentially a huge breaking point since the PM checks always get HADDR from ahblite and not necessarily just these two sources. this will need to be looked over when we fix PM to only take physical addresses. +// assign HSIZEtoLSU = {1'b0, Funct3MtoLSU[1:0]}; // the Hsize is always just the funct3M indicating the size of the data transfer. diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index e6d003b3..60f46b04 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -68,7 +68,7 @@ module mmu #(parameter ENTRY_BITS = 3, // PMA checker signals input logic [31:0] HADDR, - input logic [2:0] HSIZE, HBURST, + input logic [2:0] HSIZE, input logic HWRITE, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 785a4aa7..abafd2ce 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -52,7 +52,7 @@ module pagetablewalker ( - // *** modify to send to LSU + // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU input logic [`XLEN-1:0] MMUReadPTE, input logic MMUReady, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 303fd5ad..a82b84e0 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -155,7 +155,12 @@ module wallypipelinedhart ( logic [4:0] InstrClassM; logic [`XLEN-1:0] HRDATAW; - + + // IEU vs HPTW arbitration signals to send to LSU + logic DisableTranslation; + logic [1:0] MemRWMtoLSU; + logic [2:0] Funct3MtoLSU; + logic [1:0] AtomicMtoLSU; ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache @@ -174,13 +179,19 @@ module wallypipelinedhart ( .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); */ + // arbiter between IEU and pagetablewalker + lsuArb arbiter(.MMUTranslate(MMUTranslate), .MMUPAdr(MMUPAdr), .MemRWM(MemRWM|FMemRWM), + .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), + // outputs to LSU + .DisableTranslation(DisableTranslation), .MemRWMtoLSU(MemRWMtoLSU), .Funct3MtoLSU(Funct3MtoLSU), + .AtomicMtoLSU(AtomicMtoLSU), .*); - lsu lsu(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*, - .ReadDataW(ReadDataW), - .DisableTranslation(1'b0) // *** will connect to page table walker arbiter -); // data cache unit + lsu lsu(.MemRWM(MemRWMtoLSU), .AtomicM(AtomicMtoLSU), .Funct3M(Funct3MtoLSU), + .DisableTranslation(DisableTranslation), + .WriteDataM(WriteDatatmpM), + .ReadDataW(ReadDataW), .* ); // data cache unit ahblite ebu( //.InstrReadF(1'b0), From aeeaf6d91997006e4f89dc12a38ec56a3a3b1099 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 24 Jun 2021 13:05:22 -0500 Subject: [PATCH 04/38] Progress. --- .../config/busybear/wally-config.vh | 3 +- wally-pipelined/src/ebu/ahblite.sv | 4 +- wally-pipelined/src/hazard/hazard.sv | 26 +- wally-pipelined/src/ifu/ifu.sv | 31 +- wally-pipelined/src/lsu/lsu.sv | 48 +-- wally-pipelined/src/lsu/lsuArb.sv | 147 +++++---- .../src/wally/wallypipelinedhart.sv | 310 ++++++++++-------- 7 files changed, 335 insertions(+), 234 deletions(-) diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 0db13778..e6532ee5 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -30,7 +30,8 @@ `define BUILDROOT 0 `define BUSYBEAR 1 `define LINUX_FIX_READ {'h10000005} -`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" +`define LINUX_TEST_VECTORS "../../../busybear_boot/" +//`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" // RV32 or RV64: XLEN = 32 or 64 `define XLEN 64 diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 7a486a96..c459da91 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -80,7 +80,6 @@ module ahblite ( output logic [3:0] HSIZED, output logic HWRITED, // Stalls - output logic /*InstrUpdate, */DataStall, output logic CommitM, MemAckW ); @@ -152,9 +151,12 @@ module ahblite ( // stall signals // Note that we need to extend both stalls when MMUTRANSLATE goes to idle, // since translation might not be complete. + // *** Ross Thompson remove this datastall +/* -----\/----- EXCLUDED -----\/----- assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || MMUStall); + -----/\----- EXCLUDED -----/\----- */ //assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || // MMUStall); diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 016d8e1a..c61db2dc 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -31,7 +31,7 @@ module hazard( // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic DataStall, ICacheStallF, + input logic DCacheStall, ICacheStallF, input logic FPUStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs @@ -55,16 +55,16 @@ module hazard( // A stage must stall if the next stage is stalled // If any stages are stalled, the first stage that isn't stalled must flush. - assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE); - assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous - assign StallECause = DivBusyE || FDivBusyE; + assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE); + assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallECause = DivBusyE | FDivBusyE; assign StallMCause = 0; - assign StallWCause = DataStall || ICacheStallF; + assign StallWCause = DCacheStall | ICacheStallF; - assign StallF = StallFCause || StallD; - assign StallD = StallDCause || StallE; - assign StallE = StallECause || StallM; - assign StallM = StallMCause || StallW; + assign StallF = StallFCause | StallD; + assign StallD = StallDCause | StallE; + assign StallE = StallECause | StallM; + assign StallM = StallMCause | StallW; assign StallW = StallWCause; //assign FirstUnstalledD = (~StallD & StallF & ~MulDivStallD); @@ -76,8 +76,8 @@ module hazard( // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush assign FlushF = BPPredWrongE; - assign FlushD = FirstUnstalledD || TrapM || RetM || BPPredWrongE; - assign FlushE = FirstUnstalledE || TrapM || RetM || BPPredWrongE; - assign FlushM = FirstUnstalledM || TrapM || RetM; - assign FlushW = FirstUnstalledW || TrapM; + assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE; + assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE; + assign FlushM = FirstUnstalledM | TrapM | RetM; + assign FlushW = FirstUnstalledW | TrapM; endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 909644a8..2fa5cbfd 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -78,7 +78,6 @@ module ifu ( input logic [31:0] HADDR, input logic [2:0] HSIZE, input logic HWRITE, - input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage. input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], @@ -114,16 +113,26 @@ module ifu ( assign PCPF = {8'b0, PCPFmmu}; endgenerate - mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10), - .PTEWriteVal(PageTableEntryF), .PageTypeWriteVal(PageTypeF), - .TLBWrite(ITLBWriteF), .TLBFlush(ITLBFlushF), - .PhysicalAddress(PCPFmmu), .TLBMiss(ITLBMissF), - .TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF), - - .AtomicAccessM(1'b0), .WriteAccessM(1'b0), .ReadAccessM(1'b0), // *** is this the right way force these bits constant? should they be someething else? - .SquashBusAccess(ISquashBusAccessF), .HSELRegions(IHSELRegionsF), - .DisableTranslation(1'b0), - .*); + mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) + itlb(.TLBAccessType(2'b10), + .VirtualAddress(PCF), + .Size(2'b10), + .PTEWriteVal(PageTableEntryF), + .PageTypeWriteVal(PageTypeF), + .TLBWrite(ITLBWriteF), + .TLBFlush(ITLBFlushF), + .PhysicalAddress(PCPFmmu), + .TLBMiss(ITLBMissF), + .TLBHit(ITLBHitF), + .TLBPageFault(ITLBInstrPageFaultF), + .ExecuteAccessF(1'b1), + .AtomicAccessM(1'b0), + .WriteAccessM(1'b0), + .ReadAccessM(1'b0), + .SquashBusAccess(ISquashBusAccessF), + .HSELRegions(IHSELRegionsF), + .DisableTranslation(1'b0), + .*); // branch predictor signals diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index f53bc7c8..490e0460 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -31,7 +31,7 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, - //output logic DataStall, + output logic DataStall, // Memory Stage // connected to cpu (controls) @@ -115,28 +115,38 @@ module lsu ( // CPU's read data input ReadDataW. assign ReadDataW = HRDATAW; - mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]), - .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), - .TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM), - .PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM), - .TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM), - - .ExecuteAccessF(1'b0), - .SquashBusAccess(DSquashBusAccessM), .HSELRegions(DHSELRegionsM), - .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? + mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) + dmmu(.TLBAccessType(MemRWM), + .VirtualAddress(MemAdrM), + .Size(Funct3M[1:0]), + .PTEWriteVal(PageTableEntryM), + .PageTypeWriteVal(PageTypeM), + .TLBWrite(DTLBWriteM), + .TLBFlush(DTLBFlushM), + .PhysicalAddress(MemPAdrM), + .TLBMiss(DTLBMissM), + .TLBHit(DTLBHitM), + .TLBPageFault(DTLBPageFaultM), + .ExecuteAccessF(1'b0), + .AtomicAccessM(|AtomicM), + .WriteAccessM(MemRWM[0]), + .ReadAccessM(MemRWM[1]), + .SquashBusAccess(DSquashBusAccessM), + .HSELRegions(DHSELRegionsM), + .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? // Specify which type of page fault is occurring assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1]; assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0]; - // Determine if an Unaligned access is taking place - always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd - endcase + // Determine if an Unaligned access is taking place + always_comb + case(Funct3M[1:0]) + 2'b00: DataMisalignedM = 0; // lb, sb, lbu + 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd + endcase // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit @@ -185,7 +195,7 @@ module lsu ( endgenerate // Data stall - //assign DataStall = 0; + assign DataStall = CurrState != STATE_READY; // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 832ea423..709b3f06 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -26,77 +26,112 @@ `include "wally-config.vh" -module lsuArb ( - input logic clk, reset, +module lsuArb + (input logic clk, reset, - // signals from page table walker -// output logic [`XLEN-1:0] MMUReadPTE, // *** it seems like this is the value out of the ahblite that gets sent back to the ptw. I don;t think it needs to get checked until the next paddr has been extracted from it. - input logic MMUTranslate, // *** rename to HPTWReq -// output logic MMUReady, // *** Similar reason to mmuReadPTE - input logic [`XLEN-1:0] MMUPAdr, + // from page table walker + input logic HPTWTranslate, + input logic [`XLEN-1:0] HPTWPAdr, + // to page table walker. + output logic [`XLEN-1:0] HPTWReadPTE, + output logic HPTWReady, - // signal from CPU - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [1:0] AtomicM, - input logic [`XLEN-1:0] MemAdrM, // memory addrress to be checked coming from the CPU. *** this will be used to arbitrate to decide HADDR going into the PM checks, but it also gets sent in its normal form to the lsu because we need the virtual address for the tlb. - // back to CPU + // from CPU + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, + // to CPU + output logic [`XLEN-1:0] ReadDataW, + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, + output logic DCacheStall, + + // to LSU + output logic DisableTranslation, + output logic [1:0] MemRWMtoLSU, + output logic [2:0] Funct3MtoLSU, + output logic [1:0] AtomicMtoLSU, + output logic [`XLEN-1:0] MemAdrMtoLSU, + output logic [`XLEN-1:0] WriteDataMtoLSU, + // from LSU + input logic CommittedMfromLSU, + input logic SquashSCWfromLSU, + input logic DataMisalignedMfromLSU, + input logic [`XLEN-1:0] ReadDataWFromLSU, + input logic DataStall + + ); + + // HPTWTranslate is the request for memory by the page table walker. When + // this is high the page table walker gains priority over the CPU's data + // input. Note the ptw only makes a request after an instruction or data + // tlb miss. It is entirely possible the dcache is currently processing + // a data cache miss when an instruction tlb miss occurs. If an instruction + // in the E stage causes a d cache miss, the d cache will immediately start + // processing the request. Simultaneously the ITLB misses. By the time + // the TLB miss causes the page table walker to issue the first request + // to data memory the d cache is already busy. We can interlock by + // leveraging Stall as a d cache busy. We will need an FSM to handle this. - /* *** unused for not (23 June 2021) - output logic CommittedM, - output logic SquashSCW, - output logic DataMisalignedM, -*/ - // to LSU - output logic DisableTranslation, - output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, - output logic [1:0] AtomicMtoLSU + localparam StateReady = 0; + localparam StatePTWPending = 1; + localparam StatePTWActive = 1; - /* *********** KMG: A lot of the rest of the signals that need to be arbitrated are going to be very annoying - these are the ones that used to get sent from the ahb to the pma checkers. but our eventual - goal is to have many of them sent thru the pmp/pma FIRST before the bus can get to them. + logic [1:0] CurrState, NextState; + logic SelPTW; + - deciding how to choose the right Haddr for the PM checkers will be difficult since they currently get - HADDR from the ahblite which seems like it could come from any number of sources, while we will eventually be narrowing it down to two possible sources. + flopr #(2) StateReg( + .clk(clk), + .reset(reset), + .d(NextState), + .q(CurrState)); - other problems arise when some signals like HSIZE are used in the PM checks but there's also a differnent size input to the tlb and both of these get to go through the mmu. - which one should be chosen for which device? can the be merged somehow? + always_comb begin + case(CurrState) + StateReady: + if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; + else NextState = StateReady; + StatePTWPending: + if (~DataStall) NextState = StatePTWActive; + else NextState = StatePTWPending; + StatePTWActive: + if (~DataStall) NextState = StateReady; + else NextState = StatePTWActive; + default: NextState = StateReady; + endcase // case (CurrState) + end -*/ - /*// pmp/pma specifics sent through lsu - output logic [`XLEN-1:0] HADDRtoLSU, - output logic [2:0] HSIZEtoLSU // *** May not actually need to be arbitrated, since I'm -*/ -); - -/* *** these are all the signals that get sent to the pmp/pma chackers straight from the ahblite. We want to switch it around so the - checkers get these signals first and then the newly checked values can get sent to the ahblite. - input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. - input logic [2:0] HSIZE, - input logic HWRITE, - input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. -*/ + // multiplex the outputs to LSU + assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. + assign SelPTW = CurrState == StatePTWActive; + assign MemRWMtoLSU = SelPTW ? 2'b10 : MemRWM; generate if (`XLEN == 32) begin - - assign Funct3MtoLSU = MMUTranslate ? 3'b010 : Funct3M; // *** is this the right thing for the msB? - + assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; end else begin - - assign Funct3MtoLSU = MMUTranslate ? 3'b011 : Funct3M; // *** is this the right thing for the msB? - + assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; end endgenerate - assign AtomicMtoLSU = MMUTranslate ? 2'b00 : AtomicM; - assign MemRWMtoLSU = MemRWM; // *** along with the rest of the lsu, the mmu uses memrwm in it's pure form so I think we can just forward it through - assign DisableTranslation = MMUTranslate; -// assign HADDRtoLSU = MMUTranslate ? MMUPAdr : MemAdrM; // *** Potentially a huge breaking point since the PM checks always get HADDR from ahblite and not necessarily just these two sources. this will need to be looked over when we fix PM to only take physical addresses. -// assign HSIZEtoLSU = {1'b0, Funct3MtoLSU[1:0]}; // the Hsize is always just the funct3M indicating the size of the data transfer. + assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; + assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; + assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM; + // demux the inputs from LSU to walker or cpu's data port. - + assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWFromLSU; // probably can avoid this demux + assign HPTWReadPTE = SelPTW ? ReadDataWFromLSU : `XLEN'b0 ; // probably can avoid this demux + assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; + assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; + assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; + assign HPTWReady = ~ DataStall; + assign DCacheStall = DataStall; // *** this is probably going to change. + endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index a82b84e0..479625a5 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -26,142 +26,153 @@ `include "wally-config.vh" /* verilator lint_on UNUSED */ -module wallypipelinedhart ( - input logic clk, reset, - output logic [`XLEN-1:0] PCF, -// input logic [31:0] InstrF, - // Privileged - input logic TimerIntM, ExtIntM, SwIntM, - input logic InstrAccessFaultF, - input logic DataAccessFaultM, - input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, - // Bus Interface - input logic [15:0] rd2, // bogus, delete when real multicycle fetch works - input logic [`AHBW-1:0] HRDATA, - input logic HREADY, HRESP, - output logic HCLK, HRESETn, - output logic [31:0] HADDR, - output logic [`AHBW-1:0] HWDATA, - output logic HWRITE, - output logic [2:0] HSIZE, - output logic [2:0] HBURST, - output logic [3:0] HPROT, - output logic [1:0] HTRANS, - output logic HMASTLOCK, - output logic [5:0] HSELRegions, - // Delayed signals for subword write - output logic [2:0] HADDRD, - output logic [3:0] HSIZED, - output logic HWRITED -); +module wallypipelinedhart + ( + input logic clk, reset, + output logic [`XLEN-1:0] PCF, + // input logic [31:0] InstrF, + // Privileged + input logic TimerIntM, ExtIntM, SwIntM, + input logic InstrAccessFaultF, + input logic DataAccessFaultM, + input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, + // Bus Interface + input logic [15:0] rd2, // bogus, delete when real multicycle fetch works + input logic [`AHBW-1:0] HRDATA, + input logic HREADY, HRESP, + output logic HCLK, HRESETn, + output logic [31:0] HADDR, + output logic [`AHBW-1:0] HWDATA, + output logic HWRITE, + output logic [2:0] HSIZE, + output logic [2:0] HBURST, + output logic [3:0] HPROT, + output logic [1:0] HTRANS, + output logic HMASTLOCK, + output logic [5:0] HSELRegions, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED + ); - // logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, StallE, StallM, StallW; - logic FlushF, FlushD, FlushE, FlushM, FlushW; - logic RetM, TrapM, NonBusTrapM; + // logic [1:0] ForwardAE, ForwardBE; + logic StallF, StallD, StallE, StallM, StallW; + logic FlushF, FlushD, FlushE, FlushM, FlushW; + logic RetM, TrapM, NonBusTrapM; // new signals that must connect through DP - logic MulDivE, W64E; - logic CSRReadM, CSRWriteM, PrivilegedM; - logic [1:0] AtomicM; - logic [`XLEN-1:0] SrcAE, SrcBE; - logic [`XLEN-1:0] SrcAM; - logic [2:0] Funct3E; + logic MulDivE, W64E; + logic CSRReadM, CSRWriteM, PrivilegedM; + logic [1:0] AtomicM; + logic [`XLEN-1:0] SrcAE, SrcBE; + logic [`XLEN-1:0] SrcAM; + logic [2:0] Funct3E; // logic [31:0] InstrF; - logic [31:0] InstrD, InstrE, InstrM, InstrW; - logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; - logic [`XLEN-1:0] PCTargetE; - logic [`XLEN-1:0] CSRReadValW, MulDivResultW; - logic [`XLEN-1:0] PrivilegedNextPCM; - logic [1:0] MemRWM; - logic InstrValidM, InstrValidW; - logic InstrMisalignedFaultM; - logic DataMisalignedM; - logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD; - logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM; - logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM; - logic LoadMisalignedFaultM, LoadAccessFaultM; - logic StoreMisalignedFaultM, StoreAccessFaultM; - logic [`XLEN-1:0] InstrMisalignedAdrM; + logic [31:0] InstrD, InstrE, InstrM, InstrW; + logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; + logic [`XLEN-1:0] PCTargetE; + logic [`XLEN-1:0] CSRReadValW, MulDivResultW; + logic [`XLEN-1:0] PrivilegedNextPCM; + logic [1:0] MemRWM; + logic InstrValidM, InstrValidW; + logic InstrMisalignedFaultM; + logic DataMisalignedM; + logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD; + logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM; + logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM; + logic LoadMisalignedFaultM, LoadAccessFaultM; + logic StoreMisalignedFaultM, StoreAccessFaultM; + logic [`XLEN-1:0] InstrMisalignedAdrM; - logic PCSrcE; - logic CSRWritePendingDEM; - logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD; - logic DivDoneE; - logic DivBusyE; - logic DivDoneW; - logic [4:0] SetFflagsM; - logic [2:0] FRM_REGW; - logic FloatRegWriteW; - logic [1:0] FMemRWM; - logic RegWriteD; - logic [`XLEN-1:0] FWriteDataM; - logic SquashSCW; - logic FStallD; - logic FWriteIntE, FWriteIntW, FWriteIntM; - logic FDivBusyE; - logic IllegalFPUInstrD, IllegalFPUInstrE; - logic [`XLEN-1:0] FPUResultW; + logic PCSrcE; + logic CSRWritePendingDEM; + logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD; + logic DivDoneE; + logic DivBusyE; + logic DivDoneW; + logic [4:0] SetFflagsM; + logic [2:0] FRM_REGW; + logic FloatRegWriteW; + logic [1:0] FMemRWM; + logic RegWriteD; + logic [`XLEN-1:0] FWriteDataM; + logic SquashSCW; + logic FStallD; + logic FWriteIntE, FWriteIntW, FWriteIntM; + logic FDivBusyE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic [`XLEN-1:0] FPUResultW; // memory management unit signals - logic ITLBWriteF, DTLBWriteM; - logic ITLBFlushF, DTLBFlushM; - logic ITLBMissF, ITLBHitF; - logic DTLBMissM, DTLBHitM; - logic [`XLEN-1:0] SATP_REGW; - logic STATUS_MXR, STATUS_SUM; - logic [1:0] PrivilegeModeW; - logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; - logic [1:0] PageTypeF, PageTypeM; + logic ITLBWriteF, DTLBWriteM; + logic ITLBFlushF, DTLBFlushM; + logic ITLBMissF, ITLBHitF; + logic DTLBMissM, DTLBHitM; + logic [`XLEN-1:0] SATP_REGW; + logic STATUS_MXR, STATUS_SUM; + logic [1:0] PrivilegeModeW; + logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; + logic [1:0] PageTypeF, PageTypeM; // PMA checker signals - logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM; - logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; - logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; - logic DSquashBusAccessM, ISquashBusAccessF; - logic [5:0] DHSELRegionsM, IHSELRegionsF; - var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. + logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM; + logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; + logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; + logic DSquashBusAccessM, ISquashBusAccessF; + logic [5:0] DHSELRegionsM, IHSELRegionsF; + var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; + logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? // IMem stalls - logic ICacheStallF; - logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; - logic MMUStall; - logic MMUTranslate, MMUReady; + logic ICacheStallF; + logic DCacheStall; + logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; + logic MMUStall; + logic MMUTranslate, MMUReady; // bus interface to dmem - logic MemReadM, MemWriteM; - logic [1:0] AtomicMaskedM; - logic [2:0] Funct3M; - logic [`XLEN-1:0] MemAdrM, WriteDataM; - logic [`PA_BITS-1:0] MemPAdrM; - logic [`XLEN-1:0] ReadDataW; - logic [`PA_BITS-1:0] InstrPAdrF; - logic [`XLEN-1:0] InstrRData; - logic InstrReadF; - logic DataStall; - logic InstrAckF, MemAckW; - logic CommitM, CommittedM; + logic MemReadM, MemWriteM; + logic [1:0] AtomicMaskedM; + logic [2:0] Funct3M; + logic [`XLEN-1:0] MemAdrM, WriteDataM; + logic [`PA_BITS-1:0] MemPAdrM; + logic [`XLEN-1:0] ReadDataW; + logic [`PA_BITS-1:0] InstrPAdrF; + logic [`XLEN-1:0] InstrRData; + logic InstrReadF; + logic DataStall; + logic InstrAckF, MemAckW; + logic CommitM, CommittedM; - logic BPPredWrongE; - logic BPPredDirWrongM; - logic BTBPredPCWrongM; - logic RASPredPCWrongM; - logic BPPredClassNonCFIWrongM; + logic BPPredWrongE; + logic BPPredDirWrongM; + logic BTBPredPCWrongM; + logic RASPredPCWrongM; + logic BPPredClassNonCFIWrongM; - logic[`XLEN-1:0] WriteDatatmpM; + logic [`XLEN-1:0] WriteDatatmpM; - logic [4:0] InstrClassM; + logic [4:0] InstrClassM; - logic [`XLEN-1:0] HRDATAW; + logic [`XLEN-1:0] HRDATAW; // IEU vs HPTW arbitration signals to send to LSU - logic DisableTranslation; - logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; - logic [1:0] AtomicMtoLSU; - + logic DisableTranslation; + logic [1:0] MemRWMtoLSU; + logic [2:0] Funct3MtoLSU; + logic [1:0] AtomicMtoLSU; + logic [`XLEN-1:0] MemAdrMtoLSU; + logic [`XLEN-1:0] WriteDataMtoLSU; + logic [`XLEN-1:0] ReadDataWFromLSU; + logic CommittedMfromLSU; + logic SquashSCWfromLSU; + logic DataMisalignedMfromLSU; + + + + ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller @@ -181,28 +192,61 @@ module wallypipelinedhart ( // arbiter between IEU and pagetablewalker - lsuArb arbiter(.MMUTranslate(MMUTranslate), .MMUPAdr(MMUPAdr), .MemRWM(MemRWM|FMemRWM), - .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), - // outputs to LSU - .DisableTranslation(DisableTranslation), .MemRWMtoLSU(MemRWMtoLSU), .Funct3MtoLSU(Funct3MtoLSU), - .AtomicMtoLSU(AtomicMtoLSU), .*); + lsuArb arbiter(// HPTW connection + .HPTWTranslate(MMUTranslate), + .HPTWPAdr(MMUPAdr), + .HPTWReadPTE(MMUReadPTE), + .HPTWReady(MMUReady), + // CPU connection + .MemRWM(MemRWM|FMemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .MemAdrM(MemAdrM), + .WriteDataM(WriteDatatmpM), + .ReadDataW(ReadDataW), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .DCacheStall(DCacheStall), + // LSU + .DisableTranslation(DisableTranslation), + .MemRWMtoLSU(MemRWMtoLSU), + .Funct3MtoLSU(Funct3MtoLSU), + .AtomicMtoLSU(AtomicMtoLSU), + .MemAdrMtoLSU(MemAdrMtoLSU), + .WriteDataMtoLSU(WriteDataMtoLSU), + .CommittedMfromLSU(CommittedMfromLSU), + .SquashSCWfromLSU(SquashSCWfromLSU), + .DataMisalignedMfromLSU(DataMisalignedMfromLSU), + .ReadDataWFromLSU(ReadDataWFromLSU), + .DataStall(DataStall), + .*); - lsu lsu(.MemRWM(MemRWMtoLSU), .AtomicM(AtomicMtoLSU), .Funct3M(Funct3MtoLSU), - .DisableTranslation(DisableTranslation), - .WriteDataM(WriteDatatmpM), - .ReadDataW(ReadDataW), .* ); // data cache unit + lsu lsu(.MemRWM(MemRWMtoLSU), + .Funct3M(Funct3MtoLSU), + .AtomicM(AtomicMtoLSU), + .MemAdrM(MemAdrMtoLSU), + .WriteDataM(WriteDataMtoLSU), + .ReadDataW(ReadDataWFromLSU), + + .CommittedM(CommittedMfromLSU), + .SquashSCW(SquashSCWfromLSU), + .DataMisalignedM(DataMisalignedMfromLSU), + .DisableTranslation(DisableTranslation), + + .DataStall(DataStall), .* ); // data cache unit ahblite ebu( - //.InstrReadF(1'b0), - //.InstrRData(InstrF), // hook up InstrF later - .WriteDataM(WriteDatatmpM), - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .Funct7M(InstrM[31:25]), + //.InstrReadF(1'b0), + //.InstrRData(InstrF), // hook up InstrF later + .WriteDataM(WriteDatatmpM), + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), - .*); + .*); - + muldiv mdu(.*); // multiply and divide unit hazard hzu(.*); // global stall and flush control @@ -216,5 +260,5 @@ module wallypipelinedhart ( // presently stub out SetFlagsM and FloatRegWriteW //assign SetFflagsM = 0; //assign FloatRegWriteW = 0; - + endmodule From c02141697d6170cd25b1b65537fe71a4a72b78b0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 24 Jun 2021 13:47:10 -0500 Subject: [PATCH 05/38] Fixed combo loop in between the page table walker and i/dtlb. --- wally-pipelined/src/mmu/pagetablewalker.sv | 72 ++++++++++++++-------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index abafd2ce..21749ec5 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -36,7 +36,7 @@ module pagetablewalker ( // Control signals - input logic HCLK, HRESETn, + input logic clk, reset, input logic [`XLEN-1:0] SATP_REGW, // Signals from TLBs (addresses to translate) @@ -73,6 +73,10 @@ module pagetablewalker ( ); // Internal signals + // register TLBs translation miss requests + logic [`XLEN-1:0] TranslationVAdrQ; + logic ITLBMissFQ, DTLBMissMQ; + logic [`PPN_BITS-1:0] BasePageTablePPN; logic [`XLEN-1:0] TranslationVAdr; logic [`XLEN-1:0] SavedPTE, CurrentPTE; @@ -98,8 +102,28 @@ module pagetablewalker ( assign MemStore = MemRWM[0]; // Prefer data address translations over instruction address translations - assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; - assign MMUTranslate = DTLBMissM || ITLBMissF; + assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; // *** need to register TranslationVAdr + flopenr #(`XLEN) + TranslationVAdrReg(.clk(clk), + .reset(reset), + .en(1'b1), // *** use enable later to save power + .d(TranslationVAdr), + .q(TranslationVAdrQ)); + + flopr #(1) + DTLBMissMReg(.clk(clk), + .reset(reset), + .d(DTLBMissM), + .q(DTLBMissMQ)); + + flopr #(1) + ITLBMissMReg(.clk(clk), + .reset(reset), + .d(ITLBMissF), + .q(ITLBMissFQ)); + + + assign MMUTranslate = DTLBMissMQ | ITLBMissFQ; // unswizzle PTE bits assign {Dirty, Accessed, Global, User, @@ -108,7 +132,7 @@ module pagetablewalker ( // Assign PTE descriptors common across all XLEN values assign LeafPTE = Executable | Writable | Readable; assign ValidPTE = Valid && ~(Writable && ~Readable); - assign AccessAlert = ~Accessed || (MemStore && ~Dirty); + assign AccessAlert = ~Accessed | (MemStore & ~Dirty); // Assign specific outputs to general outputs assign PageTableEntryF = PageTableEntry; @@ -129,7 +153,7 @@ localparam LEVEL0 = 3'h0; if (`XLEN == 32) begin logic [9:0] VPN1, VPN0; - flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); // State transition logic always_comb begin @@ -162,8 +186,8 @@ localparam LEVEL0 = 3'h0; assign MegapageMisaligned = |(CurrentPPN[9:0]); assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign VPN1 = TranslationVAdr[31:22]; - assign VPN0 = TranslationVAdr[21:12]; + assign VPN1 = TranslationVAdrQ[31:22]; + assign VPN0 = TranslationVAdrQ[21:12]; // Assign combinational outputs always_comb begin @@ -193,14 +217,14 @@ localparam LEVEL0 = 3'h0; TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; - DTLBWriteM = DTLBMissM; - ITLBWriteF = ~DTLBMissM; // Prefer data over instructions + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end FAULT: begin TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - WalkerInstrPageFaultF = ~DTLBMissM; - WalkerLoadPageFaultM = DTLBMissM && ~MemStore; - WalkerStorePageFaultM = DTLBMissM && MemStore; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; MMUStall = '0; // Drop the stall early to enter trap handling code end default: begin @@ -210,7 +234,7 @@ localparam LEVEL0 = 3'h0; end // Capture page table entry from ahblite - flopenr #(32) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE); + flopenr #(32) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE); mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; @@ -227,7 +251,7 @@ localparam LEVEL0 = 3'h0; logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; - flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb begin case (WalkerState) @@ -294,10 +318,10 @@ localparam LEVEL0 = 3'h0; assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign VPN3 = TranslationVAdr[47:39]; - assign VPN2 = TranslationVAdr[38:30]; - assign VPN1 = TranslationVAdr[29:21]; - assign VPN0 = TranslationVAdr[20:12]; + assign VPN3 = TranslationVAdrQ[47:39]; + assign VPN2 = TranslationVAdrQ[38:30]; + assign VPN1 = TranslationVAdrQ[29:21]; + assign VPN0 = TranslationVAdrQ[20:12]; always_comb begin // default values @@ -338,15 +362,15 @@ localparam LEVEL0 = 3'h0; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissM; - ITLBWriteF = ~DTLBMissM; // Prefer data over instructions + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end FAULT: begin // Keep physical address alive to prevent HADDR dropping to 0 TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - WalkerInstrPageFaultF = ~DTLBMissM; - WalkerLoadPageFaultM = DTLBMissM && ~MemStore; - WalkerStorePageFaultM = DTLBMissM && MemStore; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; MMUStall = '0; // Drop the stall early to enter trap handling code end default: begin @@ -356,7 +380,7 @@ localparam LEVEL0 = 3'h0; end // Capture page table entry from ahblite - flopenr #(`XLEN) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE); + flopenr #(`XLEN) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE); mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; From 6bab454b17a68b33c9a988476a8f69e60372b859 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 24 Jun 2021 14:42:59 -0500 Subject: [PATCH 06/38] Works until pma checker breaks the simulation by reading HADDR rather than data physical address. --- .../regression/wally-pipelined-ross.do | 4 +- wally-pipelined/regression/wave.do | 149 +++++++++--------- wally-pipelined/src/ebu/ahblite.sv | 22 ++- wally-pipelined/src/lsu/lsu.sv | 2 +- 4 files changed, 88 insertions(+), 89 deletions(-) diff --git a/wally-pipelined/regression/wally-pipelined-ross.do b/wally-pipelined/regression/wally-pipelined-ross.do index 90a4f5c2..15a515de 100644 --- a/wally-pipelined/regression/wally-pipelined-ross.do +++ b/wally-pipelined/regression/wally-pipelined-ross.do @@ -35,8 +35,8 @@ switch $argc { } # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals -vopt +acc -gDEBUG=1 work.testbench -o workopt -vsim workopt +vopt -fsmdebug +acc -gDEBUG=1 work.testbench -o workopt +vsim workopt -fsmdebug do wave.do diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 9210a1a9..88879334 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -20,14 +20,14 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/DataStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE @@ -129,19 +129,9 @@ add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemReadM -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemAckW -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/MemRWM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/AtomicM add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/ReadDataW add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM -add wave -noupdate -group dcache -color Gray90 /testbench/dut/hart/dmem/CurrState add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -184,66 +174,69 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF -add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWDATA -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITE -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZE -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HBURST -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HPROT -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HTRANS -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate -group csr -color Aquamarine -label {br executed} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -add wave -noupdate -group csr -color Aquamarine -label {br miss predicted} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -add wave -noupdate -group csr -childformat {{{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -radix unsigned} {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -radix unsigned}} -subitemconfig {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} {-height 16 -radix unsigned} {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} {-height 16 -radix unsigned}} /testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW +add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN +add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/BusState +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/ProposedNextBusState +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/NextBusState +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF +add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM +add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM +add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM +add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HCLK +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESETn +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HREADY +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESP +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDR +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWDATA +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITE +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZE +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HBURST +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HPROT +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HTRANS +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HMASTLOCK +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDRD +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZED +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITED +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {12105831 ns} 0} +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {4216 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -259,4 +252,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {0 ns} {30754715 ns} +WaveRestoreZoom {4167 ns} {4406 ns} diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index c459da91..39cc70d2 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -52,11 +52,13 @@ module ahblite ( input logic [`XLEN-1:0] WriteDataM, input logic [1:0] MemSizeM, // Signals from MMU +/* -----\/----- EXCLUDED -----\/----- input logic MMUStall, input logic [`XLEN-1:0] MMUPAdr, input logic MMUTranslate, output logic [`XLEN-1:0] MMUReadPTE, output logic MMUReady, + -----/\----- EXCLUDED -----/\----- */ // Signals from PMA checker input logic DSquashBusAccessM, ISquashBusAccessF, // Signals to PMA checker (metadata of proposed access) @@ -114,14 +116,16 @@ module ahblite ( // interface that might be used in place of the ahblite. always_comb case (BusState) - IDLE: if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE; - else if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD; + IDLE: /*if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE; + else*/ if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD; else if (MemReadM) ProposedNextBusState = MEMREAD; // Memory has priority over instructions else if (MemWriteM) ProposedNextBusState = MEMWRITE; else if (InstrReadF) ProposedNextBusState = INSTRREAD; else ProposedNextBusState = IDLE; +/* -----\/----- EXCLUDED -----\/----- MMUTRANSLATE: if (~HREADY) ProposedNextBusState = MMUTRANSLATE; else ProposedNextBusState = IDLE; + -----/\----- EXCLUDED -----/\----- */ ATOMICREAD: if (~HREADY) ProposedNextBusState = ATOMICREAD; else ProposedNextBusState = ATOMICWRITE; ATOMICWRITE: if (~HREADY) ProposedNextBusState = ATOMICWRITE; @@ -142,8 +146,8 @@ module ahblite ( assign AtomicAccessM = (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE); assign ExecuteAccessF = (ProposedNextBusState == INSTRREAD); assign WriteAccessM = (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICWRITE); - assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD) || - (ProposedNextBusState == MMUTRANSLATE); + assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD);// || +// (ProposedNextBusState == MMUTRANSLATE); // The PMA and PMP checkers can decide to squash the access assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState; @@ -165,14 +169,16 @@ module ahblite ( assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE); assign #1 AccessAddress = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; - assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress; + //assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress; + assign #1 HADDR = AccessAddress; generate if (`XLEN == 32) assign PTESize = 3'b010; // in rv32, PTEs are 4 bytes else assign PTESize = 3'b011; // in rv64, PTEs are 8 bytes endgenerate assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway assign #1 AccessSize = (GrantData) ? {1'b0, MemSizeM} : ISize; - assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize; + //assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize; + assign #1 HSIZE = AccessSize; assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH assign HPROT = 4'b0011; // not used; see Section 3.7 assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise @@ -188,7 +194,7 @@ module ahblite ( // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - assign MMUReady = (BusState == MMUTRANSLATE && HREADY); + //assign MMUReady = (BusState == MMUTRANSLATE && HREADY); assign InstrRData = HRDATA; assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD); @@ -196,7 +202,7 @@ module ahblite ( // *** Bracker 6/5/21: why is this W stage? assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) || ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE)); - assign MMUReadPTE = HRDATA; + //assign MMUReadPTE = HRDATA; // Carefully decide when to update ReadDataW // ReadDataMstored holds the most recent memory read. // We need to wait until the pipeline actually advances before we can update the contents of ReadDataW diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 490e0460..c537393b 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -195,7 +195,7 @@ module lsu ( endgenerate // Data stall - assign DataStall = CurrState != STATE_READY; + assign DataStall = (CurrState == STATE_FETCH) || (CurrState == STATE_FETCH_AMO); // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately From ac597d78c8d579a1efca814d23e680945eba4870 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Thu, 24 Jun 2021 19:59:29 -0400 Subject: [PATCH 07/38] Removed AHB address, etc signals from physical memory checkers, replaced with physical address from cpu or ptw. Passes lint but not simulations. --- wally-pipelined/src/ifu/ifu.sv | 30 +++++++++++--------- wally-pipelined/src/lsu/lsu.sv | 18 ++++++------ wally-pipelined/src/mmu/adrdec.sv | 20 ++++++------- wally-pipelined/src/mmu/adrdecs.sv | 21 +++++++------- wally-pipelined/src/mmu/mmu.sv | 12 ++++---- wally-pipelined/src/mmu/pmachecker.sv | 31 ++++++++++---------- wally-pipelined/src/mmu/pmpadrdec.sv | 11 +++---- wally-pipelined/src/mmu/pmpchecker.sv | 41 ++++++++++++++------------- 8 files changed, 96 insertions(+), 88 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 2fa5cbfd..6cf6220f 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -75,15 +75,15 @@ module ifu ( output logic ITLBMissF, ITLBHitF, // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H - input logic [31:0] HADDR, - input logic [2:0] HSIZE, - input logic HWRITE, +// input logic [31:0] HADDR, +// input logic [2:0] HSIZE, +// input logic HWRITE, input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, - output logic ISquashBusAccessF, - output logic [5:0] IHSELRegionsF + output logic ISquashBusAccessF +// output logic [5:0] IHSELRegionsF ); @@ -104,13 +104,17 @@ module ifu ( logic PMPLoadAccessFaultM, PMPStoreAccessFaultM; // *** these are just so that the mmu has somewhere to put these outputs, they're unused in this stage // if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. - logic [`PA_BITS-1:0] PCPFmmu; + logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. +; generate - if (`XLEN==32) + if (`XLEN==32) begin assign PCPF = PCPFmmu[31:0]; - else + assign PCNextFPhys = {{(`PA_BITS-`XLEN){1'b0}}, PCNextF}; + end else begin assign PCPF = {8'b0, PCPFmmu}; + assign PCNextFPhys = PCNextF[`PA_BITS-1:0]; + end endgenerate mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) @@ -125,12 +129,12 @@ module ifu ( .TLBMiss(ITLBMissF), .TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF), - .ExecuteAccessF(1'b1), + .InstrReadF(InstrReadF), .AtomicAccessM(1'b0), - .WriteAccessM(1'b0), - .ReadAccessM(1'b0), + .MemReadM(1'b0), + .MemWriteM(1'b0), .SquashBusAccess(ISquashBusAccessF), - .HSELRegions(IHSELRegionsF), +// .HSELRegions(IHSELRegionsF), .DisableTranslation(1'b0), .*); @@ -147,7 +151,7 @@ module ifu ( // assign InstrReadF = 1; // *** & ICacheMissF; add later icache icache(.*, - .PCNextF(PCNextF[`PA_BITS-1:0]), + .PCNextF(PCNextFPhys), .PCPF(PCPFmmu)); flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD); diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index c537393b..096dd07c 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -92,8 +92,8 @@ module lsu ( output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. - output logic DSquashBusAccessM, - output logic [5:0] DHSELRegionsM + output logic DSquashBusAccessM +// output logic [5:0] DHSELRegionsM ); @@ -127,12 +127,12 @@ module lsu ( .TLBMiss(DTLBMissM), .TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM), - .ExecuteAccessF(1'b0), - .AtomicAccessM(|AtomicM), - .WriteAccessM(MemRWM[0]), - .ReadAccessM(MemRWM[1]), + .InstrReadF(1'b0), + .AtomicAccessM(AtomicMaskedM[1]), + .MemWriteM(MemRWM[0]), + .MemReadM(MemRWM[1]), .SquashBusAccess(DSquashBusAccessM), - .HSELRegions(DHSELRegionsM), +// .SelRegions(DHSELRegionsM), .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? // Specify which type of page fault is occurring @@ -214,13 +214,13 @@ module lsu ( else NextState = STATE_READY; STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH; else NextState = STATE_FETCH_AMO; - STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY; + STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY; // StallW will stay high if datastall stays high, so right now, once we get into STATE_FETCH, datastall goes high, and we never leave else if (MemAckW & StallW) NextState = STATE_STALLED; else NextState = STATE_FETCH; STATE_STALLED: if (~StallW) NextState = STATE_READY; else NextState = STATE_STALLED; default: NextState = STATE_READY; - endcase // case (CurrState) + endcase end endmodule diff --git a/wally-pipelined/src/mmu/adrdec.sv b/wally-pipelined/src/mmu/adrdec.sv index e2c63731..5995d8e3 100644 --- a/wally-pipelined/src/mmu/adrdec.sv +++ b/wally-pipelined/src/mmu/adrdec.sv @@ -26,13 +26,13 @@ `include "wally-config.vh" module adrdec ( - input logic [31:0] HADDR, - input logic [31:0] Base, Range, - input logic Supported, - input logic AccessValid, - input logic [2:0] Size, - input logic [3:0] SizeMask, - output logic HSEL + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic [`PA_BITS-1:0] Base, Range, + input logic Supported, + input logic AccessValid, + input logic [1:0] Size, + input logic [3:0] SizeMask, + output logic Sel ); logic Match; @@ -41,12 +41,12 @@ module adrdec ( // determine if an address is in a range starting at the base // for example, if Base = 0x04002000 and range = 0x00000FFF, // then anything address between 0x04002000 and 0x04002FFF should match (HSEL=1) - assign Match = &((HADDR ~^ Base) | Range); + assign Match = &((PhysicalAddress ~^ Base) | Range); // determine if legal size of access is being made (byte, halfword, word, doubleword) - assign SizeValid = SizeMask[Size[1:0]]; + assign SizeValid = SizeMask[Size]; - assign HSEL = Match && Supported && AccessValid && SizeValid; + assign Sel = Match && Supported && AccessValid && SizeValid; endmodule diff --git a/wally-pipelined/src/mmu/adrdecs.sv b/wally-pipelined/src/mmu/adrdecs.sv index 17f78d51..8585a4ee 100644 --- a/wally-pipelined/src/mmu/adrdecs.sv +++ b/wally-pipelined/src/mmu/adrdecs.sv @@ -26,19 +26,20 @@ `include "wally-config.vh" module adrdecs ( - input logic [31:0] HADDR, // *** will need to use PAdr in mmu, stick with HADDR in uncore - input logic AccessRW, AccessRX, AccessRWX, - input logic [2:0] HSIZE, - output logic [5:0] HSELRegions + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic AccessRW, AccessRX, AccessRWX, + input logic [1:0] Size, + output logic [5:0] SelRegions ); // Determine which region of physical memory (if any) is being accessed // *** eventually uncomment Access signals - adrdec boottimdec(HADDR, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, HSIZE, 4'b1111, HSELRegions[5]); - adrdec timdec(HADDR, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, HSIZE, 4'b1111, HSELRegions[4]); - adrdec clintdec(HADDR, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, HSIZE, 4'b1111, HSELRegions[3]); - adrdec gpiodec(HADDR, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[2]); - adrdec uartdec(HADDR, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, HSIZE, 4'b0001, HSELRegions[1]); - adrdec plicdec(HADDR, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[0]); + adrdec boottimdec(PhysicalAddress, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, Size, 4'b1111, SelRegions[5]); + adrdec timdec(PhysicalAddress, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, Size, 4'b1111, SelRegions[4]); + adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[3]); + adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]); + adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]); + adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]); + endmodule diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index 60f46b04..b6224cc4 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -67,17 +67,17 @@ module mmu #(parameter ENTRY_BITS = 3, output logic TLBPageFault, // PMA checker signals - input logic [31:0] HADDR, - input logic [2:0] HSIZE, - input logic HWRITE, - input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, +// input logic [31:0] HADDR, +// input logic [2:0] HSIZE, +// input logic HWRITE, + input logic AtomicAccessM, InstrReadF, MemWriteM, MemReadM, input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM, - output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM, - output logic [5:0] HSELRegions + output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM +// output logic [5:0] SelRegions ); diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 1d8cc3ee..da8efe97 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -32,17 +32,15 @@ module pmachecker ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, - input logic [31:0] HADDR, - input logic [2:0] HSIZE, +// input logic [31:0] HADDR, +// input logic [2:0] HSIZE, // input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused. - input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use. + input logic AtomicAccessM, InstrReadF, MemWriteM, MemReadM, // *** atomicaccessM is unused but might want to stay in for future use. output logic Cacheable, Idempotent, AtomicAllowed, output logic PMASquashBusAccess, - output logic [5:0] HSELRegions, - output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAccessFaultM @@ -51,24 +49,25 @@ module pmachecker ( // logic BootTim, Tim, CLINT, GPIO, UART, PLIC; logic PMAAccessFault; logic AccessRW, AccessRWX, AccessRX; + logic [5:0] SelRegions; // Determine what type of access is being made - assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWX = ReadAccessM | WriteAccessM | ExecuteAccessF; - assign AccessRX = ReadAccessM | ExecuteAccessF; + assign AccessRW = MemReadM | MemWriteM; + assign AccessRWX = MemReadM | MemWriteM | InstrReadF; + assign AccessRX = MemReadM | InstrReadF; // Determine which region of physical memory (if any) is being accessed - adrdecs adrdecs(HADDR, AccessRW, AccessRX, AccessRWX, HSIZE, HSELRegions); + adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); // Only RAM memory regions are cacheable - assign Cacheable = HSELRegions[5] | HSELRegions[4]; - assign Idempotent = HSELRegions[4]; - assign AtomicAllowed = HSELRegions[4]; + assign Cacheable = SelRegions[5] | SelRegions[4]; + assign Idempotent = SelRegions[4]; + assign AtomicAllowed = SelRegions[4]; // Detect access faults - assign PMAAccessFault = (~|HSELRegions) && AccessRWX; - assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault; - assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault; - assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault; + assign PMAAccessFault = (~|SelRegions) & AccessRWX; + assign PMAInstrAccessFaultF = InstrReadF & PMAAccessFault; + assign PMALoadAccessFaultM = MemReadM & PMAAccessFault; + assign PMAStoreAccessFaultM = MemWriteM & PMAAccessFault; assign PMASquashBusAccess = PMAAccessFault; endmodule diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 87f5d8f1..3286368f 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -30,7 +30,8 @@ `include "wally-config.vh" module pmpadrdec ( - input logic [31:0] HADDR, // *** replace with PAdr + input logic [`PA_BITS-1:0] PhysicalAddress, +// input logic [31:0] HADDR, // *** replace with PAdr input logic [1:0] AdrMode, input logic [`XLEN-1:0] CurrentPMPAdr, input logic AdrAtLeastPreviousPMP, @@ -45,15 +46,15 @@ module pmpadrdec ( logic TORMatch, NAMatch; logic AdrBelowCurrentPMP; logic [`PA_BITS-1:0] CurrentAdrFull; - logic [`PA_BITS-1:0] FakePhysAdr; +// logic [`PA_BITS-1:0] FakePhysAdr; // ***replace this when the true physical address from MMU is available - assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR}; +// assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR}; // Top-of-range (TOR) // Append two implicit trailing 0's to PMPAdr value assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00}; - assign AdrBelowCurrentPMP = /*HADDR */FakePhysAdr < CurrentAdrFull; // *** make sure unsigned comparison works correctly + assign AdrBelowCurrentPMP = PhysicalAddress < CurrentAdrFull; // *** make sure unsigned comparison works correctly assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP; assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP; @@ -73,7 +74,7 @@ module pmpadrdec ( endgenerate // verilator lint_on UNOPTFLAT - assign NAMatch = &((FakePhysAdr ~^ CurrentAdrFull) | Mask); + assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask); /* generate if (`XLEN == 32 || `XLEN == 64) begin // ***redo for various sizes diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index f88d56fa..08153a99 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -30,8 +30,8 @@ module pmpchecker ( // input logic clk, reset, //*** it seems like clk, reset is also not needed here? - - input logic [31:0] HADDR, + input logic [`PA_BITS-1:0] PhysicalAddress, +// input logic [31:0] HADDR, input logic [1:0] PrivilegeModeW, @@ -50,7 +50,7 @@ module pmpchecker ( // we don't have to pass around 16 whole registers. input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], - input logic ExecuteAccessF, WriteAccessM, ReadAccessM, + input logic InstrReadF, MemWriteM, MemReadM, output logic PMPSquashBusAccess, @@ -84,17 +84,20 @@ module pmpchecker ( assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4], PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW; - pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]), + pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress), + .AdrMode(PMPCFG[0][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]), .AdrAtLeastPreviousPMP(1'b1), .AdrAtLeastCurrentPMP(AboveRegion[0]), .Match(Regions[0])); + assign ActiveRegion[0] = |PMPCFG[0][4:3]; generate // *** only for PMP_ENTRIES > 0 genvar i; for (i = 1; i < `PMP_ENTRIES; i++) begin - pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]), + pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress), + .AdrMode(PMPCFG[i][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]), .AdrAtLeastPreviousPMP(AboveRegion[i-1]), .AdrAtLeastCurrentPMP(AboveRegion[i]), @@ -131,26 +134,26 @@ module pmpchecker ( default: MatchedRegion = 0; // Should only occur if there is no match endcase - assign L_Bit = PMPCFG[MatchedRegion][7] && Match; - assign X_Bit = PMPCFG[MatchedRegion][2] && Match; - assign W_Bit = PMPCFG[MatchedRegion][1] && Match; - assign R_Bit = PMPCFG[MatchedRegion][0] && Match; + assign L_Bit = PMPCFG[MatchedRegion][7] & Match; + assign X_Bit = PMPCFG[MatchedRegion][2] & Match; + assign W_Bit = PMPCFG[MatchedRegion][1] & Match; + assign R_Bit = PMPCFG[MatchedRegion][0] & Match; - assign InvalidExecute = ExecuteAccessF && ~X_Bit; - assign InvalidWrite = WriteAccessM && ~W_Bit; - assign InvalidRead = ReadAccessM && ~R_Bit; + assign InvalidExecute = InstrReadF & ~X_Bit; + assign InvalidWrite = MemWriteM & ~W_Bit; + assign InvalidRead = MemReadM & ~R_Bit; // *** don't cause faults when there are no PMPs assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidExecute : - EnforcePMP && InvalidExecute; + Match & L_Bit & InvalidExecute : + EnforcePMP & InvalidExecute; assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidWrite : - EnforcePMP && InvalidWrite; + Match & L_Bit & InvalidWrite : + EnforcePMP & InvalidWrite; assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidRead : - EnforcePMP && InvalidRead; + Match & L_Bit & InvalidRead : + EnforcePMP & InvalidRead; - assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM; + assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM; endmodule From d7e518991e901c149b102f2156be6fcc671fae19 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Thu, 24 Jun 2021 20:01:11 -0400 Subject: [PATCH 08/38] Light cleanup of signals, style. Changed several signals to account for new Phys Addr sizes as opposed to HADDR. --- wally-pipelined/config/rv32ic/wally-config.vh | 29 ++++++++++--------- wally-pipelined/config/rv64ic/wally-config.vh | 29 ++++++++++--------- wally-pipelined/src/lsu/lsuArb.sv | 16 +++++----- wally-pipelined/src/uncore/dtim.sv | 4 +-- wally-pipelined/src/uncore/uncore.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 4 +-- 6 files changed, 43 insertions(+), 41 deletions(-) diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index b6878061..29cd973a 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -61,26 +61,27 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 34'h00003FFF +//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Bus Interface width `define AHBW 32 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 954e126b..6e5b8b29 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -65,26 +65,27 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_RANGE 56'h00003FFF +`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 709b3f06..158bdbb2 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -93,17 +93,17 @@ module lsuArb always_comb begin case(CurrState) StateReady: - if (HPTWTranslate & DataStall) NextState = StatePTWPending; + if (HPTWTranslate & DataStall) NextState = StatePTWPending; else if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; - else NextState = StateReady; + else NextState = StateReady; StatePTWPending: - if (~DataStall) NextState = StatePTWActive; - else NextState = StatePTWPending; + if (~DataStall) NextState = StatePTWActive; + else NextState = StatePTWPending; StatePTWActive: - if (~DataStall) NextState = StateReady; - else NextState = StatePTWActive; - default: NextState = StateReady; - endcase // case (CurrState) + if (~DataStall) NextState = StateReady; + else NextState = StatePTWActive; + default: NextState = StateReady; + endcase end diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 6b474dae..40864567 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -37,8 +37,8 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( output logic HRESPTim, HREADYTim ); - localparam integer MemStartAddr = BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32); + localparam MemStartAddr = BASE>>(1+`XLEN/32); + localparam MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32); logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; logic [31:0] HWADDR, A; diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index cb0a8c2a..91aee66b 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -74,7 +74,7 @@ module uncore ( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders - adrdecs adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE, HSELRegions); + adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 479625a5..e494d346 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -120,10 +120,10 @@ module wallypipelinedhart logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; logic DSquashBusAccessM, ISquashBusAccessF; - logic [5:0] DHSELRegionsM, IHSELRegionsF; +// logic [5:0] DHSELRegionsM, IHSELRegionsF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. - assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? +// assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? // IMem stalls logic ICacheStallF; From d6c19e73f4f3c6dddfd95af02c0b69744e79e29e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 25 Jun 2021 11:00:42 -0500 Subject: [PATCH 09/38] Regression test runs further. The LSU state machine which fakes the Dcache had a few bugs. MemAccessM needed to be squashed on bus faults. --- .../config/busybear/wally-config.vh | 3 +- wally-pipelined/regression/wave.do | 134 ++++++++++++------ wally-pipelined/src/lsu/lsu.sv | 6 +- 3 files changed, 93 insertions(+), 50 deletions(-) diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index e6532ee5..0db13778 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -30,8 +30,7 @@ `define BUILDROOT 0 `define BUSYBEAR 1 `define LINUX_FIX_READ {'h10000005} -`define LINUX_TEST_VECTORS "../../../busybear_boot/" -//`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" +`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" // RV32 or RV64: XLEN = 32 or 64 `define XLEN 64 diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 88879334..80edad36 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,37 +7,37 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/DataStall -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/DataStall +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -117,18 +117,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -group alu -divider internals -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -expand -group alu -divider internals +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM @@ -235,8 +235,52 @@ add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu /testbench/dut/hart/arbiter/MemAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWRITE +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADY +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HTRANS +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWDATA +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/UARTIntr +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/GPIOIntr +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HRESPPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADYPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/ExtIntM +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HCLK +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HSELGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HADDR +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWDATA +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWRITE +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADY +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HTRANS +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HRESPGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADYGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsIn +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsOut +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsEn +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOIntr +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {4216 ns} 0} +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {14425 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -252,4 +296,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {4167 ns} {4406 ns} +WaveRestoreZoom {0 ns} {2330991 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 096dd07c..0ab4022c 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -156,7 +156,7 @@ module lsu ( assign MemReadM = MemRWM[1] & ~NonBusTrapM & CurrState != STATE_STALLED; assign MemWriteM = MemRWM[0] & ~NonBusTrapM && ~SquashSCM & CurrState != STATE_STALLED; assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ; - assign MemAccessM = |MemRWM; + assign MemAccessM = MemReadM | MemWriteM; // Determine if M stage committed // Reset whenever unstalled. Set when access successfully occurs @@ -195,7 +195,7 @@ module lsu ( endgenerate // Data stall - assign DataStall = (CurrState == STATE_FETCH) || (CurrState == STATE_FETCH_AMO); + assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO); // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately @@ -209,7 +209,7 @@ module lsu ( always_comb begin case (CurrState) - STATE_READY: if (MemRWM[1] & MemRWM[0]) NextState = STATE_FETCH_AMO; // *** should be some misalign check + STATE_READY: if (|AtomicMaskedM) NextState = STATE_FETCH_AMO; // *** should be some misalign check else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH; else NextState = STATE_READY; STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH; From b4a788c3413a60eb1c6973815bb161208a2ac17d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 25 Jun 2021 14:49:27 -0500 Subject: [PATCH 10/38] Working through a combo loop. --- wally-pipelined/regression/wave.do | 119 +++++++++++++++-------------- wally-pipelined/src/lsu/lsu.sv | 72 +++++++++++++---- 2 files changed, 117 insertions(+), 74 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 80edad36..09257c18 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,37 +7,37 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/DataStall -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/DataStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -209,35 +209,38 @@ add wave -noupdate -group icache -expand -group memory -group {tag write} /testb add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/BusState -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/ProposedNextBusState -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/NextBusState -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF -add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM -add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM -add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM -add wave -noupdate -expand -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HCLK -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESETn -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRDATA -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HREADY -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESP -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDR -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWDATA -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITE -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZE -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HBURST -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HPROT -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HTRANS -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HMASTLOCK -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDRD -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZED -add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITED +add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM +add wave -noupdate -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWDATA +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITE +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZE +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HBURST +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HPROT +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HTRANS +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState add wave -noupdate -expand -group lsu /testbench/dut/hart/arbiter/MemAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR @@ -280,7 +283,7 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {14425 ns} 0} +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {10763646 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -296,4 +299,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {0 ns} {2330991 ns} +WaveRestoreZoom {10763302 ns} {10763880 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 0ab4022c..8063ae48 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -100,13 +100,14 @@ module lsu ( logic SquashSCM; logic DTLBPageFaultM; logic MemAccessM; - logic [1:0] CurrState, NextState; + logic [2:0] CurrState, NextState; logic preCommittedM; localparam STATE_READY = 0; localparam STATE_FETCH = 1; - localparam STATE_FETCH_AMO = 2; - localparam STATE_STALLED = 3; + localparam STATE_FETCH_AMO_1 = 2; + localparam STATE_FETCH_AMO_2 = 3; + localparam STATE_STALLED = 4; logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. @@ -195,31 +196,70 @@ module lsu ( endgenerate // Data stall - assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO); + //assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately // requests data from memory rather than issuing a single request. - flopr #(2) stateReg(.clk(clk), + flopr #(3) stateReg(.clk(clk), .reset(reset), .d(NextState), .q(CurrState)); always_comb begin case (CurrState) - STATE_READY: if (|AtomicMaskedM) NextState = STATE_FETCH_AMO; // *** should be some misalign check - else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH; - else NextState = STATE_READY; - STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH; - else NextState = STATE_FETCH_AMO; - STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY; // StallW will stay high if datastall stays high, so right now, once we get into STATE_FETCH, datastall goes high, and we never leave - else if (MemAckW & StallW) NextState = STATE_STALLED; - else NextState = STATE_FETCH; - STATE_STALLED: if (~StallW) NextState = STATE_READY; - else NextState = STATE_STALLED; - default: NextState = STATE_READY; + STATE_READY: + if (|AtomicMaskedM) begin + NextState = STATE_FETCH_AMO_1; // *** should be some misalign check + DataStall = 1'b1; + end else if (MemAccessM & ~DataMisalignedM) begin + NextState = STATE_FETCH; + DataStall = 1'b1; + end else begin + NextState = STATE_READY; + DataStall = 1'b0; + end + STATE_FETCH_AMO_1: + DataStall = 1'b1; + if (MemAckW) begin + NextState = STATE_FETCH_AMO_2; + end else begin + NextState = STATE_FETCH_AMO_1; + end + STATE_FETCH_AMO_2: begin + DataStall = 1'b1; + if (MemAckW & ~StallW) begin + NextState = STATE_FETCH_AMO_2; + end else if (MemAckW & StallW) begin + NextState = STATE_STALLED; + end else begin + NextState = STATE_FETCH_AMO_2; + end + end + STATE_FETCH: begin + DataStall = 1'b1; + if (MemAckW & ~StallW) begin + NextState = STATE_READY; + end else if (MemAckW & StallW) begin + NextState = STATE_STALLED; + end else begin + NextState = STATE_FETCH; + end + end + STATE_STALLED: begin + DataStall = 1'b0; + if (~StallW) begin + NextState = STATE_READY; + end else begin + NextState = STATE_STALLED; + end + end + default: begin + DataStall = 1'b0; + NextState = STATE_READY; + end endcase end From 57a70748001799f3e9e2dcdba1d8055fb998cd2d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 25 Jun 2021 15:07:41 -0500 Subject: [PATCH 11/38] Some progress. Had to change how the page table walker got it's ready. --- wally-pipelined/src/lsu/lsu.sv | 6 +++++- wally-pipelined/src/lsu/lsuArb.sv | 3 ++- wally-pipelined/src/wally/wallypipelinedhart.sv | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 8063ae48..37a44a93 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -32,6 +32,7 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, output logic DataStall, + output logic HPTWReady, // Memory Stage // connected to cpu (controls) @@ -197,6 +198,8 @@ module lsu ( // Data stall //assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); + assign HPTWReady = (CurrState == STATE_READY); + // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately @@ -221,13 +224,14 @@ module lsu ( NextState = STATE_READY; DataStall = 1'b0; end - STATE_FETCH_AMO_1: + STATE_FETCH_AMO_1: begin DataStall = 1'b1; if (MemAckW) begin NextState = STATE_FETCH_AMO_2; end else begin NextState = STATE_FETCH_AMO_1; end + end STATE_FETCH_AMO_2: begin DataStall = 1'b1; if (MemAckW & ~StallW) begin diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 158bdbb2..e1a3b996 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -61,6 +61,7 @@ module lsuArb input logic SquashSCWfromLSU, input logic DataMisalignedMfromLSU, input logic [`XLEN-1:0] ReadDataWFromLSU, + input logic HPTWReadyfromLSU, input logic DataStall ); @@ -131,7 +132,7 @@ module lsuArb assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; - assign HPTWReady = ~ DataStall; + assign HPTWReady = HPTWReadyfromLSU; assign DCacheStall = DataStall; // *** this is probably going to change. endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index e494d346..3985adae 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -131,6 +131,8 @@ module wallypipelinedhart logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; logic MMUStall; logic MMUTranslate, MMUReady; + logic HPTWReadyfromLSU; + // bus interface to dmem logic MemReadM, MemWriteM; @@ -219,6 +221,7 @@ module wallypipelinedhart .SquashSCWfromLSU(SquashSCWfromLSU), .DataMisalignedMfromLSU(DataMisalignedMfromLSU), .ReadDataWFromLSU(ReadDataWFromLSU), + .HPTWReadyfromLSU(HPTWReadyfromLSU), .DataStall(DataStall), .*); @@ -235,7 +238,9 @@ module wallypipelinedhart .DataMisalignedM(DataMisalignedMfromLSU), .DisableTranslation(DisableTranslation), - .DataStall(DataStall), .* ); // data cache unit + .DataStall(DataStall), + .HPTWReady(HPTWReadyfromLSU), + .* ); // data cache unit ahblite ebu( //.InstrReadF(1'b0), From d80ebab9410ada17490c13500ab6ca96df6972f6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 25 Jun 2021 15:42:07 -0500 Subject: [PATCH 12/38] AMO and LR/SC instructions now working correctly. Page table walking is not working. --- wally-pipelined/regression/wave.do | 4 ++-- wally-pipelined/src/lsu/lsu.sv | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 09257c18..a95dc5f9 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -283,7 +283,7 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {10763646 ns} 0} +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11165332 ns} 0} {{Cursor 3} {7672141 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -299,4 +299,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {10763302 ns} {10763880 ns} +WaveRestoreZoom {11156770 ns} {11173894 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 37a44a93..4689b422 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -214,9 +214,12 @@ module lsu ( always_comb begin case (CurrState) STATE_READY: - if (|AtomicMaskedM) begin + if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check DataStall = 1'b1; + end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin + NextState = STATE_FETCH_AMO_2; + DataStall = 1'b1; end else if (MemAccessM & ~DataMisalignedM) begin NextState = STATE_FETCH; DataStall = 1'b1; From bc9c944ba0a4a0f5840c5693d3e4997ee5428ad1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 28 Jun 2021 17:26:11 -0500 Subject: [PATCH 13/38] Don't use this branch walker still broken. --- wally-pipelined/regression/wave.do | 28 +++- wally-pipelined/src/lsu/lsu.sv | 97 +++++++------ wally-pipelined/src/lsu/lsuArb.sv | 12 +- wally-pipelined/src/mmu/pagetablewalker.sv | 131 ++++++++++++------ wally-pipelined/src/mmu/tlb.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 17 ++- 6 files changed, 194 insertions(+), 93 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index a95dc5f9..a051f071 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -218,9 +218,11 @@ add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/har add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR @@ -234,9 +236,12 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED +add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState -add wave -noupdate -expand -group lsu /testbench/dut/hart/arbiter/MemAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM @@ -282,8 +287,25 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/PRegEn +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/WalkerState +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReady +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWStall +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReadPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/CurrentPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate /testbench/dut/hart/lsu/DataStall TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11165332 ns} 0} {{Cursor 3} {7672141 ns} 0} +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11172098 ns} 0} {{Cursor 3} {7672141 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -299,4 +321,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {11156770 ns} {11173894 ns} +WaveRestoreZoom {11171939 ns} {11172253 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 4689b422..dc554e52 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -29,71 +29,73 @@ // *** Ross Thompson amo misalignment check? module lsu ( - input logic clk, reset, - input logic StallM, FlushM, StallW, FlushW, - output logic DataStall, - output logic HPTWReady, + input logic clk, reset, + input logic StallM, FlushM, StallW, FlushW, + output logic DataStall, + output logic HPTWReady, // Memory Stage // connected to cpu (controls) - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [1:0] AtomicM, - output logic CommittedM, - output logic SquashSCW, - output logic DataMisalignedM, + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, // address and write data - input logic [`XLEN-1:0] MemAdrM, - input logic [`XLEN-1:0] WriteDataM, - output logic [`XLEN-1:0] ReadDataW, // from ahb + input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, + output logic [`XLEN-1:0] ReadDataW, // cpu privilege - input logic [1:0] PrivilegeModeW, - input logic DTLBFlushM, + input logic [1:0] PrivilegeModeW, + input logic DTLBFlushM, // faults - input logic NonBusTrapM, - output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, - output logic LoadMisalignedFaultM, LoadAccessFaultM, + input logic NonBusTrapM, + output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, + output logic LoadMisalignedFaultM, LoadAccessFaultM, // cpu hazard unit (trap) - output logic StoreMisalignedFaultM, StoreAccessFaultM, + output logic StoreMisalignedFaultM, StoreAccessFaultM, // connect to ahb - input logic CommitM, // should this be generated in the abh interface? - output logic [`PA_BITS-1:0] MemPAdrM, // to ahb - output logic MemReadM, MemWriteM, - output logic [1:0] AtomicMaskedM, - input logic MemAckW, // from ahb - input logic [`XLEN-1:0] HRDATAW, // from ahb + input logic CommitM, // should this be generated in the abh interface? + output logic [`PA_BITS-1:0] MemPAdrM, // to ahb + output logic MemReadM, MemWriteM, + output logic [1:0] AtomicMaskedM, + input logic MemAckW, // from ahb + input logic [`XLEN-1:0] HRDATAW, // from ahb + output logic [2:0] Funct3MfromLSU, + output logic StallWfromLSU, // mmu management // page table walker - input logic [`XLEN-1:0] PageTableEntryM, - input logic [1:0] PageTypeM, - input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, // from csr - input logic DTLBWriteM, - output logic DTLBMissM, - input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. + input logic [`XLEN-1:0] PageTableEntryM, + input logic [1:0] PageTypeM, + input logic [`XLEN-1:0] SATP_REGW, // from csr + input logic STATUS_MXR, STATUS_SUM, // from csr + input logic DTLBWriteM, + output logic DTLBMissM, + input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. - output logic DTLBHitM, // not connected + output logic DTLBHitM, // not connected // PMA/PMP (inside mmu) signals - input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. - input logic [2:0] HSIZE, - input logic HWRITE, - input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. + input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. + input logic [2:0] HSIZE, + input logic HWRITE, + input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. + input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. - output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, - output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. + output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, + output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. - output logic DSquashBusAccessM + output logic DSquashBusAccessM // output logic [5:0] DHSELRegionsM ); @@ -246,13 +248,15 @@ module lsu ( end end STATE_FETCH: begin - DataStall = 1'b1; if (MemAckW & ~StallW) begin NextState = STATE_READY; + DataStall = 1'b0; end else if (MemAckW & StallW) begin NextState = STATE_STALLED; + DataStall = 1'b1; end else begin NextState = STATE_FETCH; + DataStall = 1'b1; end end STATE_STALLED: begin @@ -268,7 +272,12 @@ module lsu ( NextState = STATE_READY; end endcase - end + end // always_comb + + // *** for now just pass through size + assign Funct3MfromLSU = Funct3M; + assign StallWfromLSU = StallW; + endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index e1a3b996..d7e62782 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -35,6 +35,7 @@ module lsuArb // to page table walker. output logic [`XLEN-1:0] HPTWReadPTE, output logic HPTWReady, + output logic HPTWStall, // from CPU input logic [1:0] MemRWM, @@ -42,6 +43,7 @@ module lsuArb input logic [1:0] AtomicM, input logic [`XLEN-1:0] MemAdrM, input logic [`XLEN-1:0] WriteDataM, + input logic StallW, // to CPU output logic [`XLEN-1:0] ReadDataW, output logic CommittedM, @@ -56,12 +58,13 @@ module lsuArb output logic [1:0] AtomicMtoLSU, output logic [`XLEN-1:0] MemAdrMtoLSU, output logic [`XLEN-1:0] WriteDataMtoLSU, + output logic StallWtoLSU, // from LSU input logic CommittedMfromLSU, input logic SquashSCWfromLSU, input logic DataMisalignedMfromLSU, input logic [`XLEN-1:0] ReadDataWFromLSU, - input logic HPTWReadyfromLSU, + input logic HPTWReadyfromLSU, input logic DataStall ); @@ -124,6 +127,7 @@ module lsuArb assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM; + assign StallWtoLSU = SelPTW ? 1'b0 : StallW; // demux the inputs from LSU to walker or cpu's data port. @@ -133,6 +137,10 @@ module lsuArb assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; assign HPTWReady = HPTWReadyfromLSU; - assign DCacheStall = DataStall; // *** this is probably going to change. + // *** need to rename DcacheStall and Datastall. + // not clear at all. I think it should be LSUStall from the LSU, + // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). + assign HPTWStall = SelPTW ? DataStall : 1'b1; + assign DCacheStall = SelPTW ? 1'b0 : DataStall; // *** this is probably going to change. endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 21749ec5..eecd46f1 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -55,6 +55,7 @@ module pagetablewalker ( // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU input logic [`XLEN-1:0] MMUReadPTE, input logic MMUReady, + input logic HPTWStall, // *** modify to send to LSU output logic [`XLEN-1:0] MMUPAdr, @@ -140,14 +141,22 @@ module pagetablewalker ( assign PageTypeF = PageType; assign PageTypeM = PageType; -localparam LEVEL0 = 3'h0; - localparam LEVEL1 = 3'h1; + localparam LEVEL0_WDV = 4'h0; + localparam LEVEL0 = 4'h8; + localparam LEVEL1_WDV = 4'h1; + localparam LEVEL1 = 4'h9; + localparam LEVEL2_WDV = 4'h2; + localparam LEVEL2 = 4'hA; + localparam LEVEL3_WDV = 4'h3; + localparam LEVEL3 = 4'hB; // space left for more levels - localparam LEAF = 3'h5; - localparam IDLE = 3'h6; - localparam FAULT = 3'h7; + localparam LEAF = 4'h5; + localparam IDLE = 4'h6; + localparam FAULT = 4'h7; - logic [2:0] WalkerState, NextWalkerState; + logic [3:0] WalkerState, NextWalkerState; + + logic PRegEn; generate if (`XLEN == 32) begin @@ -155,27 +164,32 @@ localparam LEVEL0 = 3'h0; flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0) && ~HPTWStall; + // State transition logic always_comb begin case (WalkerState) - IDLE: if (MMUTranslate) NextWalkerState = LEVEL1; + IDLE: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; else NextWalkerState = IDLE; - LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1; + LEVEL1_WDV: if (HPTWStall) NextWalkerState = LEVEL1_WDV; + else NextWalkerState = LEVEL1; + LEVEL1: // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; + if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0; + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; else NextWalkerState = FAULT; - LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0; - else if (ValidPTE && LeafPTE && ~AccessAlert) + LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; + else NextWalkerState = LEVEL0; + LEVEL0: if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; else NextWalkerState = FAULT; - LEAF: if (MMUTranslate) NextWalkerState = LEVEL1; + LEAF: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; else NextWalkerState = IDLE; - FAULT: if (MMUTranslate) NextWalkerState = LEVEL1; + FAULT: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; else NextWalkerState = IDLE; // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; @@ -201,7 +215,7 @@ localparam LEVEL0 = 3'h0; WalkerLoadPageFaultM = '0; WalkerStorePageFaultM = '0; MMUStall = '1; - + case (NextWalkerState) IDLE: begin MMUStall = '0; @@ -209,9 +223,15 @@ localparam LEVEL0 = 3'h0; LEVEL1: begin TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; end + LEVEL1_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + end LEVEL0: begin TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; end + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + end LEAF: begin // Keep physical address alive to prevent HADDR dropping to 0 TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; @@ -233,9 +253,16 @@ localparam LEVEL0 = 3'h0; endcase end - // Capture page table entry from ahblite - flopenr #(32) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE); - mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); + // Capture page table entry from data cache + // *** may need to delay reading this value until the next clock cycle. + // The clk to q latency of the SRAM in the data cache will be long. + // I cannot see directly using this value. This is no different than + // a load delay hazard. This will require rewriting the walker fsm. + // also need a new signal to save. Should be a mealy output of the fsm + // request followed by ~stall. + flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); + //mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; // Assign outputs to ahblite @@ -244,61 +271,70 @@ localparam LEVEL0 = 3'h0; assign MMUPAdr = TranslationPAdr[31:0]; end else begin - localparam LEVEL2 = 3'h2; - localparam LEVEL3 = 3'h3; logic [8:0] VPN3, VPN2, VPN1, VPN0; logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; - flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(4) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + + assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0 || + WalkerState == LEVEL2 || WalkerState == LEVEL3) && ~HPTWStall; always_comb begin case (WalkerState) - IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; + IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; + else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; else NextWalkerState = IDLE; - LEVEL3: if (~MMUReady) NextWalkerState = LEVEL3; + LEVEL3_WDV: if (HPTWStall) NextWalkerState = LEVEL3_WDV; + else NextWalkerState = LEVEL3; + LEVEL3: // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF; + if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF; // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2; + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2_WDV; else NextWalkerState = FAULT; - LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2; + LEVEL2_WDV: if (HPTWStall) NextWalkerState = LEVEL2_WDV; + else NextWalkerState = LEVEL2; + LEVEL2: // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF; + if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF; // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1; + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1_WDV; else NextWalkerState = FAULT; - LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1; + LEVEL1_WDV: if (HPTWStall) NextWalkerState = LEVEL1_WDV; + else NextWalkerState = LEVEL1; + LEVEL1: // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; + if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0; + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; else NextWalkerState = FAULT; - LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0; - else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; + LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; + else NextWalkerState = LEVEL0; + LEVEL0: + if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; else NextWalkerState = FAULT; - LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; + LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; + else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; else NextWalkerState = IDLE; - FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; + FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; + else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; else NextWalkerState = IDLE; // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; @@ -346,15 +382,29 @@ localparam LEVEL0 = 3'h0; // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, // what should translationPAdr be when level3 is just off? end + LEVEL3_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, + // what should translationPAdr be when level3 is just off? + end LEVEL2: begin TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; end + LEVEL2_WDV: begin + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + end LEVEL1: begin TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; end + LEVEL1_WDV: begin + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + end LEVEL0: begin TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; end + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + end LEAF: begin // Keep physical address alive to prevent HADDR dropping to 0 TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; @@ -380,8 +430,9 @@ localparam LEVEL0 = 3'h0; end // Capture page table entry from ahblite - flopenr #(`XLEN) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE); - mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); + flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); + //mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; // Assign outputs to ahblite diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 9431fc62..1cf63906 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -136,7 +136,7 @@ module tlb #(parameter ENTRY_BITS = 3, endgenerate // Whether translation should occur - assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE); + assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 3985adae..28b8ccde 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -132,6 +132,7 @@ module wallypipelinedhart logic MMUStall; logic MMUTranslate, MMUReady; logic HPTWReadyfromLSU; + logic HPTWStall; // bus interface to dmem @@ -171,6 +172,9 @@ module wallypipelinedhart logic CommittedMfromLSU; logic SquashSCWfromLSU; logic DataMisalignedMfromLSU; + logic StallWtoLSU; + logic StallWfromLSU; + logic [2:0] Funct3MfromLSU; @@ -199,11 +203,13 @@ module wallypipelinedhart .HPTWPAdr(MMUPAdr), .HPTWReadPTE(MMUReadPTE), .HPTWReady(MMUReady), + .HPTWStall(HPTWStall), // CPU connection .MemRWM(MemRWM|FMemRWM), .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), + .StallW(StallW), .WriteDataM(WriteDatatmpM), .ReadDataW(ReadDataW), .CommittedM(CommittedM), @@ -216,7 +222,8 @@ module wallypipelinedhart .Funct3MtoLSU(Funct3MtoLSU), .AtomicMtoLSU(AtomicMtoLSU), .MemAdrMtoLSU(MemAdrMtoLSU), - .WriteDataMtoLSU(WriteDataMtoLSU), + .WriteDataMtoLSU(WriteDataMtoLSU), + .StallWtoLSU(StallWtoLSU), .CommittedMfromLSU(CommittedMfromLSU), .SquashSCWfromLSU(SquashSCWfromLSU), .DataMisalignedMfromLSU(DataMisalignedMfromLSU), @@ -232,6 +239,7 @@ module wallypipelinedhart .MemAdrM(MemAdrMtoLSU), .WriteDataM(WriteDataMtoLSU), .ReadDataW(ReadDataWFromLSU), + .StallW(StallWtoLSU), .CommittedM(CommittedMfromLSU), .SquashSCW(SquashSCWfromLSU), @@ -239,16 +247,19 @@ module wallypipelinedhart .DisableTranslation(DisableTranslation), .DataStall(DataStall), - .HPTWReady(HPTWReadyfromLSU), + .HPTWReady(HPTWReadyfromLSU), + .Funct3MfromLSU(Funct3MfromLSU), + .StallWfromLSU(StallWfromLSU), .* ); // data cache unit ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later .WriteDataM(WriteDatatmpM), - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), + .StallW(StallWfromLSU), .*); From dd84f2958e1c32b8e0a2cef380bcf4be95a00c72 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 29 Jun 2021 22:33:57 -0500 Subject: [PATCH 14/38] Page table walker now walks the table. Added interlock so the icache stalls. Page table walker not walking correctly, goes to fault state. --- wally-pipelined/regression/wave.do | 181 +++++++++++------- wally-pipelined/src/cache/ICacheCntrl.sv | 21 +- wally-pipelined/src/cache/icache.sv | 21 +- wally-pipelined/src/ebu/ahblite.sv | 5 +- wally-pipelined/src/lsu/lsuArb.sv | 8 +- wally-pipelined/src/mmu/pagetablewalker.sv | 22 +-- .../src/wally/wallypipelinedhart.sv | 1 + 7 files changed, 158 insertions(+), 101 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index a051f071..78d6f1a6 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -33,11 +33,11 @@ add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/ add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -89,6 +89,7 @@ add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM +add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrW add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F @@ -104,7 +105,7 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 @@ -117,18 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -expand -group alu -divider internals -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -group alu -divider internals +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM @@ -174,41 +175,45 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN -add wave -noupdate -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData -add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit -add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData -add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable -add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine -add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData -add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF -add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF -add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -expand -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -expand -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN +add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState @@ -237,15 +242,17 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW +add wave -noupdate -group lsu /testbench/dut/hart/lsu/CurrState +add wave -noupdate -group lsu /testbench/dut/hart/lsu/DataStall +add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -group lsu /testbench/dut/hart/lsu/ReadDataW +add wave -noupdate -group lsu /testbench/dut/hart/lsu/WriteDataM +add wave -noupdate -group lsu /testbench/dut/hart/lsu/AtomicMaskedM +add wave -noupdate -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM +add wave -noupdate -group lsu /testbench/dut/hart/lsu/HRDATAW +add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemAckW +add wave -noupdate -group lsu /testbench/dut/hart/lsu/StallW add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR @@ -297,16 +304,48 @@ add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/M add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/CurrentPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW add wave -noupdate /testbench/dut/hart/lsu/DataStall +add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/MIP_REGW +add wave -noupdate /testbench/dut/uncore/genblk2/plic/ExtIntM +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HSELUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HADDR +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWRITE +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWDATA +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESPUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADYUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/SIN +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DSRb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DCDb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/CTSb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/RIb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/SOUT +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RTSb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/DTRb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT1b +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT2b +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb +add wave -noupdate /testbench/dut/uncore/genblk2/plic/pendingPGrouped +add wave -noupdate /testbench/dut/uncore/genblk2/plic/intPending +add wave -noupdate /testbench/dut/uncore/genblk2/plic/nextIntPending +add wave -noupdate /testbench/dut/uncore/genblk2/plic/requests +add wave -noupdate /testbench/dut/uncore/genblk2/plic/GPIOIntr +add wave -noupdate /testbench/dut/uncore/genblk2/plic/UARTIntr +add wave -noupdate /testbench/dut/uncore/genblk4/uart/u/intrpending TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11172098 ns} 0} {{Cursor 3} {7672141 ns} 0} -quietly wave cursor active 2 +WaveRestoreCursors {{Cursor 5} {9729816 ns} 0} {{Cursor 6} {7857655 ns} 0} {{Cursor 7} {7869135 ns} 1} {{Cursor 8} {7868621 ns} 0} {{Cursor 9} {7868621 ns} 0} {{Cursor 10} {7865190 ns} 0} {{Cursor 11} {7867237 ns} 0} +quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -321,4 +360,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {11171939 ns} {11172253 ns} +WaveRestoreZoom {9729788 ns} {9730412 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index f290f0ad..78bdb46a 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -40,8 +40,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( input logic [31:0] ICacheMemReadData, input logic ICacheMemReadValid, // The address at which we want to search the cache memory - output logic [`PA_BITS-1:0] PCTagF, - output logic [`PA_BITS-1:0] PCNextIndexF, + output logic [`PA_BITS-1:0] PCTagF, + output logic [`PA_BITS-1:0] PCNextIndexF, output logic ICacheReadEn, // Load data into the cache output logic ICacheMemWriteEnable, @@ -56,13 +56,15 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // Outputs to pipeline control stuff output logic ICacheStallF, EndFetchState, + input logic ITLBMissF, + input logic ITLBWriteF, // Signals to/from ahblite interface // A read containing the requested data input logic [`XLEN-1:0] InstrInF, input logic InstrAckF, // The read we request from main memory - output logic [`PA_BITS-1:0] InstrPAdrF, + output logic [`PA_BITS-1:0] InstrPAdrF, output logic InstrReadF ); @@ -109,6 +111,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_TLB_MISS = 19; + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -209,7 +213,9 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( STATE_READY: begin PCMux = 2'b00; ICacheReadEn = 1'b1; - if (hit & ~spill) begin + if (ITLBMissF) begin + NextState = STATE_TLB_MISS; + end else if (hit & ~spill) begin SavePC = 1'b1; ICacheStallF = 1'b0; NextState = STATE_READY; @@ -363,6 +369,13 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( ICacheStallF = 1'b0; NextState = STATE_READY; end + STATE_TLB_MISS: begin + if (ITLBWriteF) begin + NextState = STATE_READY; + end else begin + NextState = STATE_TLB_MISS; + end + end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index abf828fc..89b2ff9e 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -28,24 +28,27 @@ module icache ( // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, input logic [`PA_BITS-1:0] PCNextF, input logic [`PA_BITS-1:0] PCPF, // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, + output logic CompressedF, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, + input logic ITLBMissF, + input logic ITLBWriteF, + // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF + output logic [31:0] FinalInstrRawF ); // Configuration parameters diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 39cc70d2..735590e1 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -51,6 +51,7 @@ module ahblite ( input logic MemReadM, MemWriteM, input logic [`XLEN-1:0] WriteDataM, input logic [1:0] MemSizeM, + //output logic DataStall, // Signals from MMU /* -----\/----- EXCLUDED -----\/----- input logic MMUStall, @@ -158,9 +159,9 @@ module ahblite ( // *** Ross Thompson remove this datastall /* -----\/----- EXCLUDED -----\/----- assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || - (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || - MMUStall); + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE)); -----/\----- EXCLUDED -----/\----- */ + //assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || // MMUStall); diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index d7e62782..0d26af4c 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -86,6 +86,7 @@ module lsuArb logic [1:0] CurrState, NextState; logic SelPTW; + logic HPTWStallD; flopr #(2) StateReg( @@ -140,7 +141,12 @@ module lsuArb // *** need to rename DcacheStall and Datastall. // not clear at all. I think it should be LSUStall from the LSU, // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). - assign HPTWStall = SelPTW ? DataStall : 1'b1; + assign HPTWStallD = SelPTW ? DataStall : 1'b1; + flopr #(1) HPTWStallReg (.clk(clk), + .reset(reset), + .d(HPTWStallD), + .q(HPTWStall)); + assign DCacheStall = SelPTW ? 1'b0 : DataStall; // *** this is probably going to change. endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index eecd46f1..5bc15df3 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -164,7 +164,7 @@ module pagetablewalker ( flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); - assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0) && ~HPTWStall; + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; // State transition logic always_comb begin @@ -184,13 +184,11 @@ module pagetablewalker ( else NextWalkerState = FAULT; LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; else NextWalkerState = LEVEL0; - LEVEL0: if (ValidPTE && LeafPTE && ~AccessAlert) + LEVEL0: if (ValidPTE & LeafPTE & ~AccessAlert) NextWalkerState = LEAF; else NextWalkerState = FAULT; - LEAF: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = IDLE; - FAULT: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = IDLE; + LEAF: NextWalkerState = IDLE; + FAULT: NextWalkerState = IDLE; // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; endcase @@ -278,8 +276,8 @@ module pagetablewalker ( flopenl #(4) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); - assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0 || - WalkerState == LEVEL2 || WalkerState == LEVEL3) && ~HPTWStall; + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || + WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; always_comb begin case (WalkerState) @@ -329,13 +327,9 @@ module pagetablewalker ( if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; else NextWalkerState = FAULT; - LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; - else NextWalkerState = IDLE; + LEAF: NextWalkerState = IDLE; - FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; - else NextWalkerState = IDLE; + FAULT: NextWalkerState = IDLE; // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; endcase diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 28b8ccde..e23fd511 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -250,6 +250,7 @@ module wallypipelinedhart .HPTWReady(HPTWReadyfromLSU), .Funct3MfromLSU(Funct3MfromLSU), .StallWfromLSU(StallWfromLSU), +// .DataStall(LSUStall), .* ); // data cache unit ahblite ebu( From b2d8ba67421226b66d8ad8e91a1f7ff3e62897a0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 30 Jun 2021 11:24:26 -0500 Subject: [PATCH 15/38] The icache now correctly interlocks with the PTW on TLB miss. --- wally-pipelined/regression/wave.do | 195 +++++++++--------- wally-pipelined/src/lsu/lsuArb.sv | 16 +- wally-pipelined/src/mmu/pagetablewalker.sv | 38 ++-- .../src/wally/wallypipelinedhart.sv | 5 +- 4 files changed, 132 insertions(+), 122 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 78d6f1a6..20a21864 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,37 +7,37 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/DataStall -add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/DataStall +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -130,9 +130,6 @@ add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM -add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -175,42 +172,43 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -expand -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit -add wave -noupdate -expand -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -expand -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN -add wave -noupdate -expand -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF +add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext @@ -242,17 +240,18 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW -add wave -noupdate -group lsu /testbench/dut/hart/lsu/CurrState -add wave -noupdate -group lsu /testbench/dut/hart/lsu/DataStall -add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemAdrM -add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemPAdrM -add wave -noupdate -group lsu /testbench/dut/hart/lsu/ReadDataW -add wave -noupdate -group lsu /testbench/dut/hart/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/hart/lsu/AtomicMaskedM -add wave -noupdate -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM -add wave -noupdate -group lsu /testbench/dut/hart/lsu/HRDATAW -add wave -noupdate -group lsu /testbench/dut/hart/lsu/MemAckW -add wave -noupdate -group lsu /testbench/dut/hart/lsu/StallW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/HRDATAW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/StallW add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR @@ -295,22 +294,26 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/PRegEn -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/WalkerState +add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReady add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReadPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead +add wave -noupdate -expand -group ptwalker -divider data add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReadPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/CurrentPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW add wave -noupdate /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate /testbench/dut/uncore/genblk2/plic/ExtIntM @@ -336,15 +339,9 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb -add wave -noupdate /testbench/dut/uncore/genblk2/plic/pendingPGrouped -add wave -noupdate /testbench/dut/uncore/genblk2/plic/intPending -add wave -noupdate /testbench/dut/uncore/genblk2/plic/nextIntPending -add wave -noupdate /testbench/dut/uncore/genblk2/plic/requests -add wave -noupdate /testbench/dut/uncore/genblk2/plic/GPIOIntr -add wave -noupdate /testbench/dut/uncore/genblk2/plic/UARTIntr -add wave -noupdate /testbench/dut/uncore/genblk4/uart/u/intrpending +add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {9729816 ns} 0} {{Cursor 6} {7857655 ns} 0} {{Cursor 7} {7869135 ns} 1} {{Cursor 8} {7868621 ns} 0} {{Cursor 9} {7868621 ns} 0} {{Cursor 10} {7865190 ns} 0} {{Cursor 11} {7867237 ns} 0} +WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {9673965 ns} 0} quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -360,4 +357,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {9729788 ns} {9730412 ns} +WaveRestoreZoom {11172446 ns} {11172732 ns} diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 0d26af4c..9298d79d 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -31,6 +31,7 @@ module lsuArb // from page table walker input logic HPTWTranslate, + input logic HPTWRead, input logic [`XLEN-1:0] HPTWPAdr, // to page table walker. output logic [`XLEN-1:0] HPTWReadPTE, @@ -82,7 +83,7 @@ module lsuArb localparam StateReady = 0; localparam StatePTWPending = 1; - localparam StatePTWActive = 1; + localparam StatePTWActive = 2; logic [1:0] CurrState, NextState; logic SelPTW; @@ -102,11 +103,12 @@ module lsuArb else if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; else NextState = StateReady; StatePTWPending: - if (~DataStall) NextState = StatePTWActive; - else NextState = StatePTWPending; + if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; + else if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else NextState = StateReady; StatePTWActive: - if (~DataStall) NextState = StateReady; - else NextState = StatePTWActive; + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; default: NextState = StateReady; endcase end @@ -114,8 +116,8 @@ module lsuArb // multiplex the outputs to LSU assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. - assign SelPTW = CurrState == StatePTWActive; - assign MemRWMtoLSU = SelPTW ? 2'b10 : MemRWM; + assign SelPTW = (CurrState == StatePTWActive) || (CurrState == StateReady && HPTWTranslate); + assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate if (`XLEN == 32) begin diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 5bc15df3..f5e1d85c 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -36,41 +36,42 @@ module pagetablewalker ( // Control signals - input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, + input logic clk, reset, + input logic [`XLEN-1:0] SATP_REGW, // Signals from TLBs (addresses to translate) - input logic [`XLEN-1:0] PCF, MemAdrM, - input logic ITLBMissF, DTLBMissM, - input logic [1:0] MemRWM, + input logic [`XLEN-1:0] PCF, MemAdrM, + input logic ITLBMissF, DTLBMissM, + input logic [1:0] MemRWM, // Outputs to the TLBs (PTEs to write) output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, - output logic [1:0] PageTypeF, PageTypeM, - output logic ITLBWriteF, DTLBWriteM, + output logic [1:0] PageTypeF, PageTypeM, + output logic ITLBWriteF, DTLBWriteM, // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU - input logic [`XLEN-1:0] MMUReadPTE, - input logic MMUReady, - input logic HPTWStall, + input logic [`XLEN-1:0] MMUReadPTE, + input logic MMUReady, + input logic HPTWStall, // *** modify to send to LSU output logic [`XLEN-1:0] MMUPAdr, - output logic MMUTranslate, // *** rename to HPTWReq + output logic MMUTranslate, // *** rename to HPTWReq + output logic HPTWRead, // Stall signal - output logic MMUStall, + output logic MMUStall, // Faults - output logic WalkerInstrPageFaultF, - output logic WalkerLoadPageFaultM, - output logic WalkerStorePageFaultM + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM ); // Internal signals @@ -201,6 +202,9 @@ module pagetablewalker ( assign VPN1 = TranslationVAdrQ[31:22]; assign VPN0 = TranslationVAdrQ[21:12]; + assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || + WalkerState == LEVEL2 || WalkerState == LEVEL1; + // Assign combinational outputs always_comb begin // default values @@ -279,6 +283,10 @@ module pagetablewalker ( assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; + assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || + WalkerState == LEVEL2 || WalkerState == LEVEL1; + + always_comb begin case (WalkerState) IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index e23fd511..edcb7203 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -131,6 +131,7 @@ module wallypipelinedhart logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; logic MMUStall; logic MMUTranslate, MMUReady; + logic HPTWRead; logic HPTWReadyfromLSU; logic HPTWStall; @@ -186,7 +187,8 @@ module wallypipelinedhart mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall + pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), + .*); // can send addresses to ahblite, send out pagetablestall // *** can connect to hazard unit // changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. // Would need to insertinstruction as InstrD, not InstrF @@ -200,6 +202,7 @@ module wallypipelinedhart // arbiter between IEU and pagetablewalker lsuArb arbiter(// HPTW connection .HPTWTranslate(MMUTranslate), + .HPTWRead(HPTWRead), .HPTWPAdr(MMUPAdr), .HPTWReadPTE(MMUReadPTE), .HPTWReady(MMUReady), From 9ec624702d97a61de7a107400dd79838c5040709 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 30 Jun 2021 16:25:03 -0500 Subject: [PATCH 16/38] Major rewrite of ptw to remove combo loop. --- wally-pipelined/regression/wave.do | 42 ++- wally-pipelined/src/lsu/lsu.sv | 41 ++- wally-pipelined/src/lsu/lsuArb.sv | 29 +- wally-pipelined/src/mmu/pagetablewalker.sv | 283 ++++++++++++------ .../testbench/testbench-imperas.sv | 2 +- 5 files changed, 272 insertions(+), 125 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 20a21864..eef15339 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -240,7 +240,7 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM @@ -293,27 +293,36 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/PRegEn -add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReady -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWStall add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate +add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState +add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead -add wave -noupdate -expand -group ptwalker -divider data add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReadPTE -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/CurrentPTE +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE +add wave -noupdate -expand -group ptwalker -divider data add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM +add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU add wave -noupdate /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate /testbench/dut/uncore/genblk2/plic/ExtIntM @@ -340,9 +349,10 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {9673965 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {2967 ns} 0} +quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -357,4 +367,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {11172446 ns} {11172732 ns} +WaveRestoreZoom {2729 ns} {3045 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index dc554e52..cd37ce94 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -103,14 +103,17 @@ module lsu ( logic SquashSCM; logic DTLBPageFaultM; logic MemAccessM; - logic [2:0] CurrState, NextState; + logic preCommittedM; - localparam STATE_READY = 0; - localparam STATE_FETCH = 1; - localparam STATE_FETCH_AMO_1 = 2; - localparam STATE_FETCH_AMO_2 = 3; - localparam STATE_STALLED = 4; + typedef enum {STATE_READY, + STATE_FETCH, + STATE_FETCH_AMO_1, + STATE_FETCH_AMO_2, + STATE_STALLED, + STATE_TLB_MISS} statetype; + statetype CurrState, NextState; + logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. @@ -208,15 +211,20 @@ module lsu ( // requests data from memory rather than issuing a single request. - flopr #(3) stateReg(.clk(clk), - .reset(reset), - .d(NextState), - .q(CurrState)); + flopenl #(.TYPE(statetype)) stateReg(.clk(clk), + .load(reset), + .en(1'b1), + .d(NextState), + .val(STATE_READY), + .q(CurrState)); always_comb begin case (CurrState) STATE_READY: - if (AtomicMaskedM[1]) begin + if (DTLBMissM) begin + NextState = STATE_READY; + DataStall = 1'b0; + end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check DataStall = 1'b1; end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin @@ -248,15 +256,13 @@ module lsu ( end end STATE_FETCH: begin + DataStall = 1'b1; if (MemAckW & ~StallW) begin NextState = STATE_READY; - DataStall = 1'b0; end else if (MemAckW & StallW) begin NextState = STATE_STALLED; - DataStall = 1'b1; end else begin NextState = STATE_FETCH; - DataStall = 1'b1; end end STATE_STALLED: begin @@ -267,6 +273,13 @@ module lsu ( NextState = STATE_STALLED; end end + STATE_TLB_MISS: begin + if (DTLBWriteM) begin + NextState = STATE_READY; + end else begin + NextState = STATE_TLB_MISS; + end + end default: begin DataStall = 1'b0; NextState = STATE_READY; diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 9298d79d..dce509f7 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -81,26 +81,31 @@ module lsuArb // to data memory the d cache is already busy. We can interlock by // leveraging Stall as a d cache busy. We will need an FSM to handle this. - localparam StateReady = 0; - localparam StatePTWPending = 1; - localparam StatePTWActive = 2; + typedef enum{StateReady, + StatePTWPending, + StatePTWActive} statetype; + - logic [1:0] CurrState, NextState; + statetype CurrState, NextState; logic SelPTW; logic HPTWStallD; - flopr #(2) StateReg( - .clk(clk), - .reset(reset), - .d(NextState), - .q(CurrState)); + flopenl #(.TYPE(statetype)) StateReg(.clk(clk), + .load(reset), + .en(1'b1), + .d(NextState), + .val(StateReady), + .q(CurrState)); always_comb begin case(CurrState) StateReady: +/* -----\/----- EXCLUDED -----\/----- if (HPTWTranslate & DataStall) NextState = StatePTWPending; - else if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; + else + -----/\----- EXCLUDED -----/\----- */ + if (HPTWTranslate) NextState = StatePTWActive; else NextState = StateReady; StatePTWPending: if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; @@ -143,11 +148,15 @@ module lsuArb // *** need to rename DcacheStall and Datastall. // not clear at all. I think it should be LSUStall from the LSU, // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). + assign HPTWStall = SelPTW ? DataStall : 1'b1; + //assign HPTWStallD = SelPTW ? DataStall : 1'b1; +/* -----\/----- EXCLUDED -----\/----- assign HPTWStallD = SelPTW ? DataStall : 1'b1; flopr #(1) HPTWStallReg (.clk(clk), .reset(reset), .d(HPTWStallD), .q(HPTWStall)); + -----/\----- EXCLUDED -----/\----- */ assign DCacheStall = SelPTW ? 1'b0 : DataStall; // *** this is probably going to change. diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index f5e1d85c..f0f30301 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -126,6 +126,7 @@ module pagetablewalker ( assign MMUTranslate = DTLBMissMQ | ITLBMissFQ; + //assign MMUTranslate = DTLBMissM | ITLBMissF; // unswizzle PTE bits assign {Dirty, Accessed, Global, User, @@ -142,20 +143,19 @@ module pagetablewalker ( assign PageTypeF = PageType; assign PageTypeM = PageType; - localparam LEVEL0_WDV = 4'h0; - localparam LEVEL0 = 4'h8; - localparam LEVEL1_WDV = 4'h1; - localparam LEVEL1 = 4'h9; - localparam LEVEL2_WDV = 4'h2; - localparam LEVEL2 = 4'hA; - localparam LEVEL3_WDV = 4'h3; - localparam LEVEL3 = 4'hB; - // space left for more levels - localparam LEAF = 4'h5; - localparam IDLE = 4'h6; - localparam FAULT = 4'h7; + typedef enum {LEVEL0_WDV, + LEVEL0, + LEVEL1_WDV, + LEVEL1, + LEVEL2_WDV, + LEVEL2, + LEVEL3_WDV, + LEVEL3, + LEAF, + IDLE, + FAULT} statetype; - logic [3:0] WalkerState, NextWalkerState; + statetype WalkerState, NextWalkerState; logic PRegEn; @@ -163,7 +163,7 @@ module pagetablewalker ( if (`XLEN == 32) begin logic [9:0] VPN1, VPN0; - flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; @@ -202,13 +202,13 @@ module pagetablewalker ( assign VPN1 = TranslationVAdrQ[31:22]; assign VPN0 = TranslationVAdrQ[21:12]; - assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || - WalkerState == LEVEL2 || WalkerState == LEVEL1; + //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || +// WalkerState == LEVEL2 || WalkerState == LEVEL1; // Assign combinational outputs always_comb begin // default values - TranslationPAdr = '0; + //TranslationPAdr = '0; PageTableEntry = '0; PageType ='0; DTLBWriteM = '0; @@ -216,38 +216,38 @@ module pagetablewalker ( WalkerInstrPageFaultF = '0; WalkerLoadPageFaultM = '0; WalkerStorePageFaultM = '0; - MMUStall = '1; + //MMUStall = '1; case (NextWalkerState) IDLE: begin - MMUStall = '0; + //MMUStall = '0; end LEVEL1: begin - TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + //TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; end LEVEL1_WDV: begin - TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + //TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; end LEVEL0: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; end LEVEL0_WDV: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; end LEAF: begin // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end FAULT: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = '0; // Drop the stall early to enter trap handling code + // MMUStall = '0; // Drop the stall early to enter trap handling code end default: begin // nothing @@ -278,68 +278,179 @@ module pagetablewalker ( logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; - flopenl #(4) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); +/* -----\/----- EXCLUDED -----\/----- assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ - assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || - WalkerState == LEVEL2 || WalkerState == LEVEL1; + //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || +// WalkerState == LEVEL2 || WalkerState == LEVEL1; always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + MMUStall = 1'b1; + + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; + case (WalkerState) - IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3_WDV; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2_WDV; - else NextWalkerState = IDLE; + IDLE: begin + if (MMUTranslate && SvMode == `SV48) begin + NextWalkerState = LEVEL3_WDV; + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + HPTWRead = 1'b1; + end else if (MMUTranslate && SvMode == `SV39) begin + NextWalkerState = LEVEL2_WDV; + TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = IDLE; + TranslationPAdr = '0; + MMUStall = 1'b0; + end + end - LEVEL3_WDV: if (HPTWStall) NextWalkerState = LEVEL3_WDV; - else NextWalkerState = LEVEL3; - LEVEL3: + LEVEL3_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL3_WDV; + end else begin + NextWalkerState = LEVEL3; + PRegEn = 1'b1; + end + end + + LEVEL3: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadTerapage) begin + NextWalkerState = LEAF; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL2_WDV; + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end + + end + + LEVEL2_WDV: begin + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL2_WDV; + end else begin + NextWalkerState = LEVEL2; + PRegEn = 1'b1; + end + end + + LEVEL2: begin // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2_WDV; - else NextWalkerState = FAULT; + if (ValidPTE && LeafPTE && ~BadGigapage) begin + NextWalkerState = LEAF; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end - LEVEL2_WDV: if (HPTWStall) NextWalkerState = LEVEL2_WDV; - else NextWalkerState = LEVEL2; - LEVEL2: - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = FAULT; + end - LEVEL1_WDV: if (HPTWStall) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = LEVEL1; - LEVEL1: - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = FAULT; + LEVEL1_WDV: begin + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end - LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = LEVEL0; - LEVEL0: - if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; - else NextWalkerState = FAULT; + LEVEL1: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end + end + + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end + + LEVEL0: begin + if (ValidPTE && LeafPTE && ~AccessAlert) begin + NextWalkerState = LEAF; + end else begin + NextWalkerState = FAULT; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end + end - LEAF: NextWalkerState = IDLE; + LEAF: begin + NextWalkerState = IDLE; + MMUStall = 1'b0; + end + + FAULT: begin + NextWalkerState = IDLE; + MMUStall = 1'b0; + end + + // Default case should never happen + default: begin + NextWalkerState = IDLE; + end - FAULT: NextWalkerState = IDLE; - // Default case should never happen, but is included for linter. - default: NextWalkerState = IDLE; endcase end @@ -363,53 +474,55 @@ module pagetablewalker ( always_comb begin // default values - TranslationPAdr = '0; + //TranslationPAdr = '0; PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; ITLBWriteF = '0; +/* -----\/----- EXCLUDED -----\/----- WalkerInstrPageFaultF = '0; WalkerLoadPageFaultM = '0; WalkerStorePageFaultM = '0; + -----/\----- EXCLUDED -----/\----- */ // The MMU defaults to stalling the processor - MMUStall = '1; + //MMUStall = '1; case (NextWalkerState) IDLE: begin - MMUStall = '0; + //MMUStall = '0; end LEVEL3: begin - TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + //TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, // what should translationPAdr be when level3 is just off? end LEVEL3_WDV: begin - TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + //TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, // what should translationPAdr be when level3 is just off? end LEVEL2: begin - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + //TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; end LEVEL2_WDV: begin - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + //TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; end LEVEL1: begin - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; end LEVEL1_WDV: begin - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; end LEVEL0: begin - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; end LEVEL0_WDV: begin - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; end LEAF: begin // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : @@ -419,11 +532,13 @@ module pagetablewalker ( end FAULT: begin // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; +/* -----\/----- EXCLUDED -----\/----- WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = '0; // Drop the stall early to enter trap handling code + -----/\----- EXCLUDED -----/\----- */ + //MMUStall = '0; // Drop the stall early to enter trap handling code end default: begin // nothing diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 1bbe6124..95ae9343 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -538,9 +538,9 @@ string tests32f[] = '{ else tests = {tests, tests64iNOc}; if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; - if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; if (`D_SUPPORTED) tests = {tests64d, tests}; if (`F_SUPPORTED) tests = {tests64f, tests}; + if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; end //tests = {tests64a, tests}; end else begin // RV32 From 002c32d2ad0ae2a43a0cfe58eee6ee35a22c9228 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 30 Jun 2021 17:02:36 -0500 Subject: [PATCH 17/38] The icache ptw interlock is actually correct now. There needed to be a 1 cycle delay. --- wally-pipelined/regression/wave.do | 4 +- wally-pipelined/src/cache/ICacheCntrl.sv | 7 +++- wally-pipelined/src/mmu/pagetablewalker.sv | 48 +++++++++++++--------- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index eef15339..4669e48c 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -351,7 +351,7 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {2967 ns} 0} +WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {3207 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -367,4 +367,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {2729 ns} {3045 ns} +WaveRestoreZoom {2930 ns} {3454 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 78bdb46a..bc5c30b3 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -112,6 +112,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 19; + localparam STATE_TLB_MISS_DONE = 20; + localparam AHBByteLength = `XLEN / 8; @@ -371,11 +373,14 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( end STATE_TLB_MISS: begin if (ITLBWriteF) begin - NextState = STATE_READY; + NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; end end + STATE_TLB_MISS_DONE : begin + NextState = STATE_READY; + end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index f0f30301..e917e83c 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -96,7 +96,25 @@ module pagetablewalker ( // Outputs of walker logic [`XLEN-1:0] PageTableEntry; logic [1:0] PageType; + logic StartWalk; + logic EndWalk; + + typedef enum {LEVEL0_WDV, + LEVEL0, + LEVEL1_WDV, + LEVEL1, + LEVEL2_WDV, + LEVEL2, + LEVEL3_WDV, + LEVEL3, + LEAF, + IDLE, + FAULT} statetype; + statetype WalkerState, NextWalkerState; + + logic PRegEn; + assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; @@ -108,23 +126,30 @@ module pagetablewalker ( flopenr #(`XLEN) TranslationVAdrReg(.clk(clk), .reset(reset), - .en(1'b1), // *** use enable later to save power + .en(StartWalk), // *** use enable later to save power .d(TranslationVAdr), .q(TranslationVAdrQ)); - flopr #(1) + flopenrc #(1) DTLBMissMReg(.clk(clk), .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), .d(DTLBMissM), .q(DTLBMissMQ)); - flopr #(1) + flopenrc #(1) ITLBMissMReg(.clk(clk), .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), .d(ITLBMissF), .q(ITLBMissFQ)); - + + assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); + assign EndWalk = WalkerState == LEAF; + assign MMUTranslate = DTLBMissMQ | ITLBMissFQ; //assign MMUTranslate = DTLBMissM | ITLBMissF; @@ -143,21 +168,6 @@ module pagetablewalker ( assign PageTypeF = PageType; assign PageTypeM = PageType; - typedef enum {LEVEL0_WDV, - LEVEL0, - LEVEL1_WDV, - LEVEL1, - LEVEL2_WDV, - LEVEL2, - LEVEL3_WDV, - LEVEL3, - LEAF, - IDLE, - FAULT} statetype; - - statetype WalkerState, NextWalkerState; - - logic PRegEn; generate if (`XLEN == 32) begin From 157b1b31bf7341eec198ccacab826e77742081ef Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 30 Jun 2021 19:24:59 -0500 Subject: [PATCH 18/38] Icache ITLB interlock fix. --- wally-pipelined/src/cache/ICacheCntrl.sv | 26 ++++++++++++++++++++---- wally-pipelined/src/cache/icache.sv | 21 +++++++++++-------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index f290f0ad..bc5c30b3 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -40,8 +40,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( input logic [31:0] ICacheMemReadData, input logic ICacheMemReadValid, // The address at which we want to search the cache memory - output logic [`PA_BITS-1:0] PCTagF, - output logic [`PA_BITS-1:0] PCNextIndexF, + output logic [`PA_BITS-1:0] PCTagF, + output logic [`PA_BITS-1:0] PCNextIndexF, output logic ICacheReadEn, // Load data into the cache output logic ICacheMemWriteEnable, @@ -56,13 +56,15 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // Outputs to pipeline control stuff output logic ICacheStallF, EndFetchState, + input logic ITLBMissF, + input logic ITLBWriteF, // Signals to/from ahblite interface // A read containing the requested data input logic [`XLEN-1:0] InstrInF, input logic InstrAckF, // The read we request from main memory - output logic [`PA_BITS-1:0] InstrPAdrF, + output logic [`PA_BITS-1:0] InstrPAdrF, output logic InstrReadF ); @@ -109,6 +111,10 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_TLB_MISS = 19; + localparam STATE_TLB_MISS_DONE = 20; + + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -209,7 +215,9 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( STATE_READY: begin PCMux = 2'b00; ICacheReadEn = 1'b1; - if (hit & ~spill) begin + if (ITLBMissF) begin + NextState = STATE_TLB_MISS; + end else if (hit & ~spill) begin SavePC = 1'b1; ICacheStallF = 1'b0; NextState = STATE_READY; @@ -363,6 +371,16 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( ICacheStallF = 1'b0; NextState = STATE_READY; end + STATE_TLB_MISS: begin + if (ITLBWriteF) begin + NextState = STATE_TLB_MISS_DONE; + end else begin + NextState = STATE_TLB_MISS; + end + end + STATE_TLB_MISS_DONE : begin + NextState = STATE_READY; + end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index abf828fc..89b2ff9e 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -28,24 +28,27 @@ module icache ( // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, input logic [`PA_BITS-1:0] PCNextF, input logic [`PA_BITS-1:0] PCPF, // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, + output logic CompressedF, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, + input logic ITLBMissF, + input logic ITLBWriteF, + // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF + output logic [31:0] FinalInstrRawF ); // Configuration parameters From 88a18496cf6dd7c4d5298561b59b5f053bd67185 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 12:49:09 -0500 Subject: [PATCH 19/38] Got some stores working in virtual memory. --- wally-pipelined/regression/wave.do | 28 ++++++----- wally-pipelined/src/lsu/lsu.sv | 32 ++++++++++--- wally-pipelined/src/lsu/lsuArb.sv | 4 +- wally-pipelined/src/mmu/pagetablewalker.sv | 55 +++++++++++++++++----- 4 files changed, 84 insertions(+), 35 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 4669e48c..4058b4f0 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -26,7 +26,7 @@ add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/Ret add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/DataStall +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD @@ -212,7 +212,7 @@ add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icach add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext -add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState +add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM @@ -306,15 +306,16 @@ add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/T add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM -add wave -noupdate -expand -group ptwalker -expand -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate @@ -349,9 +350,10 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss +add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {3207 ns} 0} +WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {3377 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 @@ -367,4 +369,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {2930 ns} {3454 ns} +WaveRestoreZoom {3091 ns} {3683 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index cd37ce94..709b9a24 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -111,7 +111,9 @@ module lsu ( STATE_FETCH_AMO_1, STATE_FETCH_AMO_2, STATE_STALLED, - STATE_TLB_MISS} statetype; + STATE_PTW_READY, + STATE_PTW_FETCH, + STATE_PTW_DONE} statetype; statetype CurrState, NextState; @@ -160,8 +162,8 @@ module lsu ( // Changed DataMisalignedM to a larger combination of trap sources // NonBusTrapM is anything that the bus doesn't contribute to producing // By contrast, using TrapM results in circular logic errors - assign MemReadM = MemRWM[1] & ~NonBusTrapM & CurrState != STATE_STALLED; - assign MemWriteM = MemRWM[0] & ~NonBusTrapM && ~SquashSCM & CurrState != STATE_STALLED; + assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ; assign MemAccessM = MemReadM | MemWriteM; @@ -222,7 +224,7 @@ module lsu ( case (CurrState) STATE_READY: if (DTLBMissM) begin - NextState = STATE_READY; + NextState = STATE_PTW_READY; DataStall = 1'b0; end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check @@ -273,13 +275,29 @@ module lsu ( NextState = STATE_STALLED; end end - STATE_TLB_MISS: begin + STATE_PTW_READY: begin + DataStall = 1'b0; if (DTLBWriteM) begin - NextState = STATE_READY; + NextState = STATE_PTW_DONE; + end else if (MemReadM & ~DataMisalignedM) begin + NextState = STATE_PTW_FETCH; end else begin - NextState = STATE_TLB_MISS; + NextState = STATE_PTW_READY; end end + STATE_PTW_FETCH : begin + DataStall = 1'b1; + if (MemAckW & ~DTLBWriteM) begin + NextState = STATE_PTW_READY; + end else if (MemAckW & DTLBWriteM) begin + NextState = STATE_PTW_DONE; + end else begin + NextState = STATE_PTW_FETCH; + end + end + STATE_PTW_DONE: begin + NextState = STATE_READY; + end default: begin DataStall = 1'b0; NextState = STATE_READY; diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index dce509f7..bf925704 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -121,7 +121,7 @@ module lsuArb // multiplex the outputs to LSU assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. - assign SelPTW = (CurrState == StatePTWActive) || (CurrState == StateReady && HPTWTranslate); + assign SelPTW = (CurrState == StatePTWActive && HPTWTranslate) || (CurrState == StateReady && HPTWTranslate); assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate @@ -158,6 +158,6 @@ module lsuArb .q(HPTWStall)); -----/\----- EXCLUDED -----/\----- */ - assign DCacheStall = SelPTW ? 1'b0 : DataStall; // *** this is probably going to change. + assign DCacheStall = SelPTW ? 1'b1 : DataStall; // *** this is probably going to change. endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index e917e83c..160dccc3 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -148,9 +148,13 @@ module pagetablewalker ( assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); - assign EndWalk = WalkerState == LEAF; + assign EndWalk = (WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == FAULT); - assign MMUTranslate = DTLBMissMQ | ITLBMissFQ; + assign MMUTranslate = (DTLBMissMQ | ITLBMissFQ) & ~EndWalk; //assign MMUTranslate = DTLBMissM | ITLBMissF; // unswizzle PTE bits @@ -304,6 +308,10 @@ module pagetablewalker ( TranslationPAdr = '0; HPTWRead = 1'b0; MMUStall = 1'b1; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; WalkerInstrPageFaultF = 1'b0; WalkerLoadPageFaultM = 1'b0; @@ -343,7 +351,13 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadTerapage) begin - NextWalkerState = LEAF; + NextWalkerState = IDLE; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -376,7 +390,13 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadGigapage) begin - NextWalkerState = LEAF; + NextWalkerState = IDLE; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -409,7 +429,14 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadMegapage) begin - NextWalkerState = LEAF; + NextWalkerState = IDLE; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -437,7 +464,14 @@ module pagetablewalker ( LEVEL0: begin if (ValidPTE && LeafPTE && ~AccessAlert) begin - NextWalkerState = LEAF; + NextWalkerState = IDLE; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + end else begin NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; @@ -485,11 +519,12 @@ module pagetablewalker ( always_comb begin // default values //TranslationPAdr = '0; +/* -----\/----- EXCLUDED -----\/----- PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; ITLBWriteF = '0; -/* -----\/----- EXCLUDED -----\/----- + WalkerInstrPageFaultF = '0; WalkerLoadPageFaultM = '0; WalkerStorePageFaultM = '0; @@ -533,12 +568,6 @@ module pagetablewalker ( LEAF: begin // Keep physical address alive to prevent HADDR dropping to 0 //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions end FAULT: begin // Keep physical address alive to prevent HADDR dropping to 0 From ec21126474be1d38574ad22f72c9fe0dc811605f Mon Sep 17 00:00:00 2001 From: Teo Ene Date: Thu, 1 Jul 2021 13:32:42 -0500 Subject: [PATCH 20/38] Flow updated for 90nm --- .gitmodules | 3 - sky130/sky130_osu_sc_t12 | 1 - wally-pipelined/src/generic/lzd.sv~ | 195 ---------------------------- 3 files changed, 199 deletions(-) delete mode 160000 sky130/sky130_osu_sc_t12 delete mode 100755 wally-pipelined/src/generic/lzd.sv~ diff --git a/.gitmodules b/.gitmodules index 65e1e71c..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "sky130/sky130_osu_sc_t12"] - path = sky130/sky130_osu_sc_t12 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ diff --git a/sky130/sky130_osu_sc_t12 b/sky130/sky130_osu_sc_t12 deleted file mode 160000 index f60f2d03..00000000 --- a/sky130/sky130_osu_sc_t12 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6 diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ deleted file mode 100755 index bfffe5e5..00000000 --- a/wally-pipelined/src/generic/lzd.sv~ +++ /dev/null @@ -1,195 +0,0 @@ -/////////////////////////////////////////// -// lzd.sv -// -// Written: James.Stine@okstate.edu 1 February 2021 -// Modified: -// -// Purpose: Integer Divide instructions -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" -/* verilator lint_off DECLFILENAME */ - -// Original idea came from V. G. Oklobdzija, "An algorithmic and novel -// design of a leading zero detector circuit: comparison with logic -// synthesis," in IEEE Transactions on Very Large Scale Integration -// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: -// 10.1109/92.273153. - -// Modified to be more hierarchical - -module lz2 (P, V, B); - - input logic [1:0] B; - - output logic P; - output logic V; - - assign V = B[0] | B[1]; - assign P = B[0] & ~B[1]; - -endmodule // lz2 - -module lzd_hier #(parameter WIDTH=8) - (input logic [WIDTH-1:0] B, - output logic [$clog2(WIDTH)-1:0] ZP, - output logic ZV); - - if (WIDTH == 128) - lz128 lzd127 (ZP, ZV, B); - else if (WIDTH == 64) - lz64 lzd64 (ZP, ZV, B); - else if (WIDTH == 32) - lz32 lzd32 (ZP, ZV, B); - else if (WIDTH == 16) - lz16 lzd16 (ZP, ZV, B); - else if (WIDTH == 8) - lz8 lzd8 (ZP, ZV, B); - else if (WIDTH == 4) - lz4 lzd4 (ZP, ZV, B); - -endmodule // lzd_hier - -module lz4 (ZP, ZV, B); - - input logic [3:0] B; - - logic ZPa; - logic ZPb; - logic ZVa; - logic ZVb; - - output logic [1:0] ZP; - output logic ZV; - - lz2 l1(ZPa, ZVa, B[1:0]); - lz2 l2(ZPb, ZVb, B[3:2]); - - assign ZP[0:0] = ZVb ? ZPb : ZPa; - assign ZP[1] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz8 (ZP, ZV, B); - - input logic [7:0] B; - - logic [1:0] ZPa; - logic [1:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [2:0] ZP; - output logic ZV; - - lz4 l1(ZPa, ZVa, B[3:0]); - lz4 l2(ZPb, ZVb, B[7:4]); - - assign ZP[1:0] = ZVb ? ZPb : ZPa; - assign ZP[2] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz16 (ZP, ZV, B); - - input logic [15:0] B; - - logic [2:0] ZPa; - logic [2:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [3:0] ZP; - output logic ZV; - - lz8 l1(ZPa, ZVa, B[7:0]); - lz8 l2(ZPb, ZVb, B[15:8]); - - assign ZP[2:0] = ZVb ? ZPb : ZPa; - assign ZP[3] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz16 - -module lz32 (ZP, ZV, B); - - input logic [31:0] B; - - logic [3:0] ZPa; - logic [3:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [4:0] ZP; - output logic ZV; - - lz16 l1(ZPa, ZVa, B[15:0]); - lz16 l2(ZPb, ZVb, B[31:16]); - - assign ZP[3:0] = ZVb ? ZPb : ZPa; - assign ZP[4] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz32 - -module lz64 (ZP, ZV, B); - - input logic [63:0] B; - - logic [4:0] ZPa; - logic [4:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [5:0] ZP; - output logic ZV; - - lz32 l1(ZPa, ZVa, B[31:0]); - lz32 l2(ZPb, ZVb, B[63:32]); - - assign ZP[4:0] = ZVb ? ZPb : ZPa; - assign ZP[5] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz64 - -module lz128 (ZP, ZV, B); - - input logic [127:0] B; - - logic [5:0] ZPa; - logic [5:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [6:0] ZP; - output logic ZV; - - lz64 l1(ZPa, ZVa, B[64:0]); - lz64 l2(ZPb, ZVb, B[127:63]); - - assign ZP[5:0] = ZVb ? ZPb : ZPa; - assign ZP[6] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz128 - -/* verilator lint_on DECLFILENAME */ From 2dc349ea6fddbbb0663a3cfc259a6bcca1c13d25 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 16:55:16 -0500 Subject: [PATCH 22/38] Fixed the wrong virtual address write into the dtlb. --- wally-pipelined/regression/wave.do | 73 ++++++++++++------- wally-pipelined/src/lsu/lsu.sv | 6 +- wally-pipelined/src/mmu/pagetablewalker.sv | 84 +++------------------- 3 files changed, 61 insertions(+), 102 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 4058b4f0..f6bf34cc 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -118,18 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -group alu -divider internals -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -expand -group alu -divider internals +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -241,6 +241,7 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM @@ -316,16 +317,17 @@ add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/h add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk -add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState +add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead +add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU add wave -noupdate /testbench/dut/hart/lsu/DataStall -add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/MIP_REGW +add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate /testbench/dut/uncore/genblk2/plic/ExtIntM add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn @@ -351,10 +353,31 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite -add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VPNIndex +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/HitPageType +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VirtualPageNumber +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWrite +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/SATP_REGW +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/STATUS_MXR +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/STATUS_SUM +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PrivilegeModeW +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBAccessType +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PageTypeWriteVal +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBFlush TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 5} {11172515 ns} 0} {{Cursor 8} {3377 ns} 0} -quietly wave cursor active 2 +WaveRestoreCursors {{Cursor 8} {3766 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} +quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -369,4 +392,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {3091 ns} {3683 ns} +WaveRestoreZoom {3163 ns} {3403 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 709b9a24..740d401d 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -225,7 +225,7 @@ module lsu ( STATE_READY: if (DTLBMissM) begin NextState = STATE_PTW_READY; - DataStall = 1'b0; + DataStall = 1'b1; end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check DataStall = 1'b1; @@ -278,7 +278,7 @@ module lsu ( STATE_PTW_READY: begin DataStall = 1'b0; if (DTLBWriteM) begin - NextState = STATE_PTW_DONE; + NextState = STATE_READY; end else if (MemReadM & ~DataMisalignedM) begin NextState = STATE_PTW_FETCH; end else begin @@ -290,7 +290,7 @@ module lsu ( if (MemAckW & ~DTLBWriteM) begin NextState = STATE_PTW_READY; end else if (MemAckW & DTLBWriteM) begin - NextState = STATE_PTW_DONE; + NextState = STATE_READY; end else begin NextState = STATE_PTW_FETCH; end diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 160dccc3..10519bd1 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -148,7 +148,8 @@ module pagetablewalker ( assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); - assign EndWalk = (WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) || + assign EndWalk = WalkerState == LEAF || + //(WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) || (WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) || (WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) || (WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) || @@ -351,13 +352,14 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadTerapage) begin - NextWalkerState = IDLE; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -390,13 +392,14 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadGigapage) begin - NextWalkerState = IDLE; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -429,13 +432,14 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadMegapage) begin - NextWalkerState = IDLE; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. @@ -464,14 +468,14 @@ module pagetablewalker ( LEVEL0: begin if (ValidPTE && LeafPTE && ~AccessAlert) begin - NextWalkerState = IDLE; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - + TranslationPAdr = TranslationVAdrQ; end else begin NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; @@ -516,74 +520,6 @@ module pagetablewalker ( assign VPN1 = TranslationVAdrQ[29:21]; assign VPN0 = TranslationVAdrQ[20:12]; - always_comb begin - // default values - //TranslationPAdr = '0; -/* -----\/----- EXCLUDED -----\/----- - PageTableEntry = '0; - PageType = '0; - DTLBWriteM = '0; - ITLBWriteF = '0; - - WalkerInstrPageFaultF = '0; - WalkerLoadPageFaultM = '0; - WalkerStorePageFaultM = '0; - -----/\----- EXCLUDED -----/\----- */ - - // The MMU defaults to stalling the processor - //MMUStall = '1; - - case (NextWalkerState) - IDLE: begin - //MMUStall = '0; - end - LEVEL3: begin - //TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, - // what should translationPAdr be when level3 is just off? - end - LEVEL3_WDV: begin - //TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, - // what should translationPAdr be when level3 is just off? - end - LEVEL2: begin - //TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - end - LEVEL2_WDV: begin - //TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - end - LEVEL1: begin - //TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - end - LEVEL1_WDV: begin - //TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - end - LEVEL0: begin - //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - end - LEVEL0_WDV: begin - //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - end - LEAF: begin - // Keep physical address alive to prevent HADDR dropping to 0 - //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - end - FAULT: begin - // Keep physical address alive to prevent HADDR dropping to 0 - //TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; -/* -----\/----- EXCLUDED -----\/----- - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; - -----/\----- EXCLUDED -----/\----- */ - //MMUStall = '0; // Drop the stall early to enter trap handling code - end - default: begin - // nothing - end - endcase - end // Capture page table entry from ahblite flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); From 6916784354c9c04f335902565025bfc11c35a60b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 17:17:53 -0500 Subject: [PATCH 23/38] Fixed tab space issue. --- wally-pipelined/src/mmu/pagetablewalker.sv | 301 +++++++++++---------- 1 file changed, 151 insertions(+), 150 deletions(-) diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 10519bd1..e425b367 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -30,90 +30,91 @@ `include "wally-config.vh" /* *** - TO-DO: - - Implement faults on accessed/dirty behavior -*/ + TO-DO: + - Implement faults on accessed/dirty behavior + */ -module pagetablewalker ( - // Control signals - input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, +module pagetablewalker + ( + // Control signals + input logic clk, reset, + input logic [`XLEN-1:0] SATP_REGW, - // Signals from TLBs (addresses to translate) - input logic [`XLEN-1:0] PCF, MemAdrM, - input logic ITLBMissF, DTLBMissM, - input logic [1:0] MemRWM, + // Signals from TLBs (addresses to translate) + input logic [`XLEN-1:0] PCF, MemAdrM, + input logic ITLBMissF, DTLBMissM, + input logic [1:0] MemRWM, - // Outputs to the TLBs (PTEs to write) - output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, - output logic [1:0] PageTypeF, PageTypeM, - output logic ITLBWriteF, DTLBWriteM, + // Outputs to the TLBs (PTEs to write) + output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, + output logic [1:0] PageTypeF, PageTypeM, + output logic ITLBWriteF, DTLBWriteM, - // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU - input logic [`XLEN-1:0] MMUReadPTE, - input logic MMUReady, - input logic HPTWStall, + // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU + input logic [`XLEN-1:0] MMUReadPTE, + input logic MMUReady, + input logic HPTWStall, - // *** modify to send to LSU - output logic [`XLEN-1:0] MMUPAdr, - output logic MMUTranslate, // *** rename to HPTWReq - output logic HPTWRead, + // *** modify to send to LSU + output logic [`XLEN-1:0] MMUPAdr, + output logic MMUTranslate, // *** rename to HPTWReq + output logic HPTWRead, - // Stall signal - output logic MMUStall, + // Stall signal + output logic MMUStall, - // Faults - output logic WalkerInstrPageFaultF, - output logic WalkerLoadPageFaultM, - output logic WalkerStorePageFaultM -); + // Faults + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM + ); // Internal signals // register TLBs translation miss requests - logic [`XLEN-1:0] TranslationVAdrQ; - logic ITLBMissFQ, DTLBMissMQ; + logic [`XLEN-1:0] TranslationVAdrQ; + logic ITLBMissFQ, DTLBMissMQ; - logic [`PPN_BITS-1:0] BasePageTablePPN; - logic [`XLEN-1:0] TranslationVAdr; - logic [`XLEN-1:0] SavedPTE, CurrentPTE; - logic [`PA_BITS-1:0] TranslationPAdr; - logic [`PPN_BITS-1:0] CurrentPPN; + logic [`PPN_BITS-1:0] BasePageTablePPN; + logic [`XLEN-1:0] TranslationVAdr; + logic [`XLEN-1:0] SavedPTE, CurrentPTE; + logic [`PA_BITS-1:0] TranslationPAdr; + logic [`PPN_BITS-1:0] CurrentPPN; logic [`SVMODE_BITS-1:0] SvMode; - logic MemStore; + logic MemStore; // PTE Control Bits - logic Dirty, Accessed, Global, User, - Executable, Writable, Readable, Valid; + logic Dirty, Accessed, Global, User, + Executable, Writable, Readable, Valid; // PTE descriptions - logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; + logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; // Outputs of walker - logic [`XLEN-1:0] PageTableEntry; - logic [1:0] PageType; - logic StartWalk; - logic EndWalk; + logic [`XLEN-1:0] PageTableEntry; + logic [1:0] PageType; + logic StartWalk; + logic EndWalk; - typedef enum {LEVEL0_WDV, - LEVEL0, - LEVEL1_WDV, - LEVEL1, - LEVEL2_WDV, - LEVEL2, - LEVEL3_WDV, - LEVEL3, - LEAF, - IDLE, - FAULT} statetype; + typedef enum {LEVEL0_WDV, + LEVEL0, + LEVEL1_WDV, + LEVEL1, + LEVEL2_WDV, + LEVEL2, + LEVEL3_WDV, + LEVEL3, + LEAF, + IDLE, + FAULT} statetype; statetype WalkerState, NextWalkerState; - logic PRegEn; + logic PRegEn; assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -145,7 +146,7 @@ module pagetablewalker ( .clear(EndWalk), .d(ITLBMissF), .q(ITLBMissFQ)); - + assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); assign EndWalk = WalkerState == LEAF || @@ -186,23 +187,23 @@ module pagetablewalker ( always_comb begin case (WalkerState) IDLE: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = IDLE; + else NextWalkerState = IDLE; LEVEL1_WDV: if (HPTWStall) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = LEVEL1; + else NextWalkerState = LEVEL1; LEVEL1: - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = FAULT; + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; + else NextWalkerState = FAULT; LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = LEVEL0; + else NextWalkerState = LEVEL0; LEVEL0: if (ValidPTE & LeafPTE & ~AccessAlert) - NextWalkerState = LEAF; - else NextWalkerState = FAULT; + NextWalkerState = LEAF; + else NextWalkerState = FAULT; LEAF: NextWalkerState = IDLE; FAULT: NextWalkerState = IDLE; // Default case should never happen, but is included for linter. @@ -218,7 +219,7 @@ module pagetablewalker ( assign VPN0 = TranslationVAdrQ[21:12]; //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || -// WalkerState == LEVEL2 || WalkerState == LEVEL1; + // WalkerState == LEVEL2 || WalkerState == LEVEL1; // Assign combinational outputs always_comb begin @@ -262,7 +263,7 @@ module pagetablewalker ( WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - // MMUStall = '0; // Drop the stall early to enter trap handling code + // MMUStall = '0; // Drop the stall early to enter trap handling code end default: begin // nothing @@ -291,17 +292,17 @@ module pagetablewalker ( logic [8:0] VPN3, VPN2, VPN1, VPN0; - logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; + logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); -/* -----\/----- EXCLUDED -----\/----- - assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || - WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; - -----/\----- EXCLUDED -----/\----- */ + /* -----\/----- EXCLUDED -----\/----- + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || + WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || -// WalkerState == LEVEL2 || WalkerState == LEVEL1; + // WalkerState == LEVEL2 || WalkerState == LEVEL1; always_comb begin @@ -352,152 +353,152 @@ module pagetablewalker ( // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadTerapage) begin - NextWalkerState = LEAF; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; - end + TranslationPAdr = TranslationVAdrQ; + end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin NextWalkerState = LEVEL2_WDV; TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end + end - end + end LEVEL2_WDV: begin - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - //HPTWRead = 1'b1; - if (HPTWStall) begin - NextWalkerState = LEVEL2_WDV; - end else begin - NextWalkerState = LEVEL2; - PRegEn = 1'b1; - end - end - - LEVEL2: begin - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL2_WDV; + end else begin + NextWalkerState = LEVEL2; + PRegEn = 1'b1; + end + end + + LEVEL2: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadGigapage) begin - NextWalkerState = LEAF; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; - end + TranslationPAdr = TranslationVAdrQ; + end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin NextWalkerState = LEVEL1_WDV; - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - HPTWRead = 1'b1; - end else begin + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + HPTWRead = 1'b1; + end else begin NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end + end - end + end LEVEL1_WDV: begin TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - //HPTWRead = 1'b1; - if (HPTWStall) begin - NextWalkerState = LEVEL1_WDV; - end else begin + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin NextWalkerState = LEVEL1; - PRegEn = 1'b1; - end - end + PRegEn = 1'b1; + end + end - LEVEL1: begin + LEVEL1: begin // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. if (ValidPTE && LeafPTE && ~BadMegapage) begin - NextWalkerState = LEAF; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; - - end + TranslationPAdr = TranslationVAdrQ; + + end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin NextWalkerState = LEVEL0_WDV; TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end - end + end + end LEVEL0_WDV: begin TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - //HPTWRead = 1'b1; - if (HPTWStall) begin - NextWalkerState = LEVEL0_WDV; - end else begin + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin NextWalkerState = LEVEL0; - PRegEn = 1'b1; - end - end + PRegEn = 1'b1; + end + end - LEVEL0: begin + LEVEL0: begin if (ValidPTE && LeafPTE && ~AccessAlert) begin - NextWalkerState = LEAF; + NextWalkerState = LEAF; PageTableEntry = CurrentPTE; PageType = (WalkerState == LEVEL3) ? 2'b11 : ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; - end else begin - NextWalkerState = FAULT; + TranslationPAdr = TranslationVAdrQ; + end else begin + NextWalkerState = FAULT; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end - end - + end + end + LEAF: begin - NextWalkerState = IDLE; - MMUStall = 1'b0; - end + NextWalkerState = IDLE; + MMUStall = 1'b0; + end FAULT: begin - NextWalkerState = IDLE; - MMUStall = 1'b0; - end + NextWalkerState = IDLE; + MMUStall = 1'b0; + end // Default case should never happen default: begin - NextWalkerState = IDLE; - end + NextWalkerState = IDLE; + end endcase end From 61027f650c47d613176944da584b753e769c4a8f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 17:37:53 -0500 Subject: [PATCH 24/38] OMG. It's working! --- wally-pipelined/regression/wave.do | 103 ++++++++--------- wally-pipelined/src/cache/ICacheCntrl.sv | 124 +++++++++++---------- wally-pipelined/src/lsu/lsuArb.sv | 39 +++++-- wally-pipelined/src/mmu/pagetablewalker.sv | 17 +-- 4 files changed, 141 insertions(+), 142 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index f6bf34cc..213b5cee 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,37 +7,37 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -299,6 +299,8 @@ add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/paget add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE @@ -318,17 +320,17 @@ add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/h add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState -add wave -noupdate -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW -add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead -add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState +add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU add wave -noupdate /testbench/dut/hart/lsu/DataStall add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW -add wave -noupdate /testbench/dut/uncore/genblk2/plic/ExtIntM add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HSELUART @@ -351,8 +353,8 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb -add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss -add wave -noupdate -expand -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite +add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss +add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation @@ -364,20 +366,9 @@ add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/Virtua add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWrite add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/SATP_REGW -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/STATUS_MXR -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/STATUS_SUM -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PrivilegeModeW -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBAccessType -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/PageTypeWriteVal -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite -add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/TLBFlush TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 8} {3766 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} -quietly wave cursor active 3 +WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} +quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -392,4 +383,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {3163 ns} {3403 ns} +WaveRestoreZoom {4209 ns} {4657 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index bc5c30b3..ea52130c 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -25,48 +25,50 @@ `include "wally-config.vh" -module ICacheCntrl #(parameter BLOCKLEN = 256) ( - // Inputs from pipeline - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, +module ICacheCntrl #(parameter BLOCKLEN = 256) + ( + // Inputs from pipeline + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, - // Input the address to read - // The upper bits of the physical pc - input logic [`PA_BITS-1:0] PCNextF, - input logic [`PA_BITS-1:0] PCPF, - // Signals to/from cache memory - // The read coming out of it - input logic [31:0] ICacheMemReadData, - input logic ICacheMemReadValid, - // The address at which we want to search the cache memory - output logic [`PA_BITS-1:0] PCTagF, - output logic [`PA_BITS-1:0] PCNextIndexF, - output logic ICacheReadEn, - // Load data into the cache - output logic ICacheMemWriteEnable, - output logic [BLOCKLEN-1:0] ICacheMemWriteData, + // Input the address to read + // The upper bits of the physical pc + input logic [`PA_BITS-1:0] PCNextF, + input logic [`PA_BITS-1:0] PCPF, + // Signals to/from cache memory + // The read coming out of it + input logic [31:0] ICacheMemReadData, + input logic ICacheMemReadValid, + // The address at which we want to search the cache memory + output logic [`PA_BITS-1:0] PCTagF, + output logic [`PA_BITS-1:0] PCNextIndexF, + output logic ICacheReadEn, + // Load data into the cache + output logic ICacheMemWriteEnable, + output logic [BLOCKLEN-1:0] ICacheMemWriteData, - // Outputs to rest of ifu - // High if the instruction in the fetch stage is compressed - output logic CompressedF, - // The instruction that was requested - // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF, + // Outputs to rest of ifu + // High if the instruction in the fetch stage is compressed + output logic CompressedF, + // The instruction that was requested + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros + output logic [31:0] FinalInstrRawF, - // Outputs to pipeline control stuff - output logic ICacheStallF, EndFetchState, - input logic ITLBMissF, - input logic ITLBWriteF, + // Outputs to pipeline control stuff + output logic ICacheStallF, EndFetchState, + input logic ITLBMissF, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, - // Signals to/from ahblite interface - // A read containing the requested data - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, - // The read we request from main memory - output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF -); + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + // The read we request from main memory + output logic [`PA_BITS-1:0] InstrPAdrF, + output logic InstrReadF + ); // FSM states localparam STATE_READY = 0; @@ -125,39 +127,39 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam WORDSPERLINE = BLOCKLEN/`XLEN; localparam LOGWPL = $clog2(WORDSPERLINE); - localparam integer PA_WIDTH = `PA_BITS - 2; + localparam integer PA_WIDTH = `PA_BITS - 2; - logic [4:0] CurrState, NextState; - logic hit, spill; - logic SavePC; - logic [1:0] PCMux; - logic CntReset; - logic PreCntEn, CntEn; - logic spillSave; - logic UnalignedSelect; - logic FetchCountFlag; + logic [4:0] CurrState, NextState; + logic hit, spill; + logic SavePC; + logic [1:0] PCMux; + logic CntReset; + logic PreCntEn, CntEn; + logic spillSave; + logic UnalignedSelect; + logic FetchCountFlag; localparam FetchCountThreshold = WORDSPERLINE - 1; - logic [LOGWPL:0] FetchCount, NextFetchCount; + logic [LOGWPL:0] FetchCount, NextFetchCount; - logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF; + logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF; logic [`PA_BITS-1:OFFSETWIDTH] PCPTrunkF; - logic [15:0] SpillDataBlock0; + logic [15:0] SpillDataBlock0; localparam [31:0] NOP = 32'h13; - logic reset_q; - logic [1:0] PCMux_q; + logic reset_q; + logic [1:0] PCMux_q; - // Misaligned signals - //logic [`XLEN:0] MisalignedInstrRawF; - //logic MisalignedStall; - // Cache fault signals - //logic FaultStall; + // Misaligned signals + //logic [`XLEN:0] MisalignedInstrRawF; + //logic MisalignedStall; + // Cache fault signals + //logic FaultStall; // on spill we want to get the first 2 bytes of the next cache block. // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can @@ -181,7 +183,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // truncate the offset from PCPF for memory address generation assign PCPTrunkF = PCTagF[`PA_BITS-1:OFFSETWIDTH]; - // Detect if the instruction is compressed + // Detect if the instruction is compressed assign CompressedF = FinalInstrRawF[1:0] != 2'b11; @@ -372,7 +374,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF) begin + if (ITLBWriteF | WalkerInstrPageFaultF) begin NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; @@ -425,7 +427,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // store read data from memory interface before writing into SRAM. - genvar i; + genvar i; generate for (i = 0; i < WORDSPERLINE; i++) begin flopenr #(`XLEN) flop(.clk(clk), diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index bf925704..76d89798 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -101,23 +101,38 @@ module lsuArb always_comb begin case(CurrState) StateReady: -/* -----\/----- EXCLUDED -----\/----- - if (HPTWTranslate & DataStall) NextState = StatePTWPending; - else - -----/\----- EXCLUDED -----/\----- */ - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; - StatePTWPending: - if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; - else if (HPTWTranslate & DataStall) NextState = StatePTWPending; - else NextState = StateReady; + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; StatePTWActive: - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + default: NextState = StateReady; + endcase + end + +/* -----\/----- EXCLUDED -----\/----- + + always_comb begin + case(CurrState) + StateReady: + /-* -----\/----- EXCLUDED -----\/----- + if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else + -----/\----- EXCLUDED -----/\----- *-/ + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + StatePTWPending: + if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; + else if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else NextState = StateReady; + StatePTWActive: + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; default: NextState = StateReady; endcase end + -----/\----- EXCLUDED -----/\----- */ // multiplex the outputs to LSU assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index e425b367..3670069b 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -369,9 +369,6 @@ module pagetablewalker HPTWRead = 1'b1; end else begin NextWalkerState = FAULT; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; end end @@ -409,9 +406,6 @@ module pagetablewalker HPTWRead = 1'b1; end else begin NextWalkerState = FAULT; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; end end @@ -450,9 +444,6 @@ module pagetablewalker HPTWRead = 1'b1; end else begin NextWalkerState = FAULT; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; end end @@ -479,9 +470,6 @@ module pagetablewalker TranslationPAdr = TranslationVAdrQ; end else begin NextWalkerState = FAULT; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; end end @@ -492,7 +480,10 @@ module pagetablewalker FAULT: begin NextWalkerState = IDLE; - MMUStall = 1'b0; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + MMUStall = 1'b0; end // Default case should never happen From 118dfa9cece4db860ef52f6a23e7132647241556 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 17:59:55 -0500 Subject: [PATCH 25/38] added page table walker fault exit for icache. --- wally-pipelined/src/cache/icache.sv | 1 + wally-pipelined/src/ifu/ifu.sv | 84 ++++----- wally-pipelined/src/mmu/pagetablewalker.sv | 168 ++++++++++-------- .../src/wally/wallypipelinedhart.sv | 8 +- 4 files changed, 144 insertions(+), 117 deletions(-) diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index 89b2ff9e..943ab1b8 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -45,6 +45,7 @@ module icache output logic ICacheStallF, input logic ITLBMissF, input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 6cf6220f..23d44608 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -27,62 +27,64 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushF, FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, - output logic [`XLEN-1:0] PCF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + output logic [`XLEN-1:0] PCF, output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, - output logic ICacheStallF, + output logic InstrReadF, + output logic ICacheStallF, // Decode - output logic [`XLEN-1:0] PCD, + output logic [`XLEN-1:0] PCD, // Execute - output logic [`XLEN-1:0] PCLinkE, - input logic PCSrcE, - input logic [`XLEN-1:0] PCTargetE, - output logic [`XLEN-1:0] PCE, - output logic BPPredWrongE, + output logic [`XLEN-1:0] PCLinkE, + input logic PCSrcE, + input logic [`XLEN-1:0] PCTargetE, + output logic [`XLEN-1:0] PCE, + output logic BPPredWrongE, // Mem - input logic RetM, TrapM, - input logic [`XLEN-1:0] PrivilegedNextPCM, - output logic [31:0] InstrD, InstrE, InstrM, InstrW, - output logic [`XLEN-1:0] PCM, - output logic [4:0] InstrClassM, - output logic BPPredDirWrongM, - output logic BTBPredPCWrongM, - output logic RASPredPCWrongM, - output logic BPPredClassNonCFIWrongM, + input logic RetM, TrapM, + input logic [`XLEN-1:0] PrivilegedNextPCM, + output logic [31:0] InstrD, InstrE, InstrM, InstrW, + output logic [`XLEN-1:0] PCM, + output logic [4:0] InstrClassM, + output logic BPPredDirWrongM, + output logic BTBPredPCWrongM, + output logic RASPredPCWrongM, + output logic BPPredClassNonCFIWrongM, // Writeback // output logic [`XLEN-1:0] PCLinkW, // Faults - input logic IllegalBaseInstrFaultD, - output logic ITLBInstrPageFaultF, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM, + input logic IllegalBaseInstrFaultD, + output logic ITLBInstrPageFaultF, + output logic IllegalIEUInstrFaultD, + output logic InstrMisalignedFaultM, + output logic [`XLEN-1:0] InstrMisalignedAdrM, // mmu management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PageTableEntryF, - input logic [1:0] PageTypeF, - input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, - input logic ITLBWriteF, ITLBFlushF, - output logic ITLBMissF, ITLBHitF, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] PageTableEntryF, + input logic [1:0] PageTypeF, + input logic [`XLEN-1:0] SATP_REGW, + input logic STATUS_MXR, STATUS_SUM, + input logic ITLBWriteF, ITLBFlushF, + input logic WalkerInstrPageFaultF, + + output logic ITLBMissF, ITLBHitF, // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H // input logic [31:0] HADDR, // input logic [2:0] HSIZE, // input logic HWRITE, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], - output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, - output logic ISquashBusAccessF + output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, + output logic ISquashBusAccessF // output logic [5:0] IHSELRegionsF ); @@ -105,7 +107,6 @@ module ifu ( // if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. -; generate if (`XLEN==32) begin @@ -152,7 +153,8 @@ module ifu ( icache icache(.*, .PCNextF(PCNextFPhys), - .PCPF(PCPFmmu)); + .PCPF(PCPFmmu), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF)); flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD); diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 3670069b..a8f9500f 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -181,31 +181,106 @@ module pagetablewalker flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); +/* -----\/----- EXCLUDED -----\/----- assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ // State transition logic always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + MMUStall = 1'b1; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; + + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; + case (WalkerState) - IDLE: if (MMUTranslate) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = IDLE; - LEVEL1_WDV: if (HPTWStall) NextWalkerState = LEVEL1_WDV; - else NextWalkerState = LEVEL1; - LEVEL1: + IDLE: begin + if (MMUTranslate && SvMode == `SV32) begin // *** Added SvMode + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = IDLE; + TranslationPAdr = '0; + MMUStall = 1'b0; + end + end + + LEVEL1_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end + + LEVEL1: begin // *** According to the architecture, we should // fault upon finding a superpage that is misaligned or has 0 // access bit. The following commented line of code is // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = FAULT; - LEVEL0_WDV: if (HPTWStall) NextWalkerState = LEVEL0_WDV; - else NextWalkerState = LEVEL0; - LEVEL0: if (ValidPTE & LeafPTE & ~AccessAlert) - NextWalkerState = LEAF; - else NextWalkerState = FAULT; - LEAF: NextWalkerState = IDLE; - FAULT: NextWalkerState = IDLE; + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux? + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end + + LEVEL0: begin + if (ValidPTE & LeafPTE & ~AccessAlert) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + end else begin + NextWalkerState = FAULT; + end + end + + LEAF: begin + NextWalkerState = IDLE; + MMUStall = 1'b0; + end + FAULT: begin + NextWalkerState = IDLE; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + MMUStall = 1'b0; + end + // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; endcase @@ -221,55 +296,6 @@ module pagetablewalker //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || // WalkerState == LEVEL2 || WalkerState == LEVEL1; - // Assign combinational outputs - always_comb begin - // default values - //TranslationPAdr = '0; - PageTableEntry = '0; - PageType ='0; - DTLBWriteM = '0; - ITLBWriteF = '0; - WalkerInstrPageFaultF = '0; - WalkerLoadPageFaultM = '0; - WalkerStorePageFaultM = '0; - //MMUStall = '1; - - case (NextWalkerState) - IDLE: begin - //MMUStall = '0; - end - LEVEL1: begin - //TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; - end - LEVEL1_WDV: begin - //TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; - end - LEVEL0: begin - //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - end - LEVEL0_WDV: begin - //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - end - LEAF: begin - // Keep physical address alive to prevent HADDR dropping to 0 - //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - end - FAULT: begin - //TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; - // MMUStall = '0; // Drop the stall early to enter trap handling code - end - default: begin - // nothing - end - endcase - end // Capture page table entry from data cache // *** may need to delay reading this value until the next clock cycle. @@ -338,7 +364,6 @@ module pagetablewalker LEVEL3_WDV: begin TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - //HPTWRead = 1'b1; if (HPTWStall) begin NextWalkerState = LEVEL3_WDV; end else begin @@ -355,12 +380,12 @@ module pagetablewalker if (ValidPTE && LeafPTE && ~BadTerapage) begin NextWalkerState = LEAF; PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : + PageType = (WalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux? ((WalkerState == LEVEL2) ? 2'b10 : ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -397,7 +422,7 @@ module pagetablewalker ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -434,7 +459,7 @@ module pagetablewalker ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. @@ -449,7 +474,6 @@ module pagetablewalker LEVEL0_WDV: begin TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - //HPTWRead = 1'b1; if (HPTWStall) begin NextWalkerState = LEVEL0_WDV; end else begin @@ -467,7 +491,7 @@ module pagetablewalker ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ; + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; end else begin NextWalkerState = FAULT; end diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index edcb7203..d59ec313 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -176,11 +176,11 @@ module wallypipelinedhart logic StallWtoLSU; logic StallWfromLSU; logic [2:0] Funct3MfromLSU; + - - - - ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache + ifu ifu(.InstrInF(InstrRData), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller From d1a366472f44d0143589033c63e0b562bd27f6c9 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 1 Jul 2021 18:04:43 -0500 Subject: [PATCH 26/38] reverted change to the imperas tests order. Accidently commited change which placed the virtual memory tests first. --- wally-pipelined/testbench/testbench-imperas.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 95ae9343..1bbe6124 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -538,9 +538,9 @@ string tests32f[] = '{ else tests = {tests, tests64iNOc}; if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; + if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; if (`D_SUPPORTED) tests = {tests64d, tests}; if (`F_SUPPORTED) tests = {tests64f, tests}; - if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; end //tests = {tests64a, tests}; end else begin // RV32 From c85e0df1ff159908c1cbe43544c77c038986142c Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 2 Jul 2021 11:04:13 -0400 Subject: [PATCH 27/38] Optimized PMP checker logic and added support for configurable number of PMP registers --- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/mmu/mmu.sv | 4 +- wally-pipelined/src/mmu/pmpchecker.sv | 79 ++++++++++-------- wally-pipelined/src/privileged/csr.sv | 2 +- wally-pipelined/src/privileged/csrm.sv | 80 +++++++++---------- wally-pipelined/src/privileged/privileged.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 2 +- 8 files changed, 89 insertions(+), 84 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index afae5ff4..b08a1503 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -79,7 +79,7 @@ module ifu ( input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ffa79adf..8c9de2ff 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -70,7 +70,7 @@ module lsu ( input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index ff315f12..32309baa 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -70,8 +70,8 @@ module mmu #(parameter ENTRY_BITS = 3, input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM, diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index f88d56fa..5344249c 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -35,7 +35,6 @@ module pmpchecker ( input logic [1:0] PrivilegeModeW, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** ModelSim has a switch -svinputport which controls whether input ports // are nets (wires) or vars by default. The default setting of this switch is @@ -48,6 +47,7 @@ module pmpchecker ( // boundary. It would be better to store the PMP address registers in a module // somewhere in the CSR hierarchy and do PMP checking _within_ that module, so // we don't have to pass around 16 whole registers. + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, @@ -60,29 +60,23 @@ module pmpchecker ( ); // Bit i is high when the address falls in PMP region i - logic [15:0] Regions; - logic [3:0] MatchedRegion; - logic Match, EnforcePMP; + logic [`PMP_ENTRIES-1:0] Regions, FirstMatch; + //logic [3:0] MatchedRegion; + logic EnforcePMP; - logic [7:0] PMPCFG [15:0]; + logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; // Bit i is high when the address is greater than or equal to PMPADR[i] // Used for determining whether TOR PMP regions match - logic [15:0] AboveRegion; + logic [`PMP_ENTRIES-1:0] AboveRegion; // Bit i is high if PMP register i is non-null - logic [15:0] ActiveRegion; + logic [`PMP_ENTRIES-1:0] ActiveRegion; - logic L_Bit, X_Bit, W_Bit, R_Bit; - logic InvalidExecute, InvalidWrite, InvalidRead; + logic [`PMP_ENTRIES-1:0] L_Bits, X_Bits, W_Bits, R_Bits; + //logic InvalidExecute, InvalidWrite, InvalidRead; - // *** extend to optionally 64 configurations - - assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12], - PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW; - - assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4], - PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW; + genvar i,j; pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]), @@ -92,7 +86,6 @@ module pmpchecker ( assign ActiveRegion[0] = |PMPCFG[0][4:3]; generate // *** only for PMP_ENTRIES > 0 - genvar i; for (i = 1; i < `PMP_ENTRIES; i++) begin pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]), @@ -104,12 +97,34 @@ module pmpchecker ( end endgenerate - assign Match = |Regions; + //assign Match = |Regions; - // Only enforce PMP checking for S and U modes when at least one PMP is active - assign EnforcePMP = |ActiveRegion; - - // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region + // verilator lint_off UNOPTFLAT + logic [`PMP_ENTRIES-1:0] NoLowerMatch; +// assign NoLowerMatch[0] = 1; + generate + // verilator lint_off WIDTH + for (j=0; j<`PMP_ENTRIES; j = j+8) begin + assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], + PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; + end + // verilator lint_on WIDTH + for (i=0; i<`PMP_ENTRIES; i++) begin + if (i==0) begin + assign FirstMatch[i] = Regions[i]; + assign NoLowerMatch[i] = ~Regions[i]; + end else begin + assign FirstMatch[i] = Regions[i] & NoLowerMatch[i]; + assign NoLowerMatch[i] = NoLowerMatch[i-1] & ~Regions[i]; + end + assign L_Bits[i] = PMPCFG[i][7] & FirstMatch[i]; + assign X_Bits[i] = PMPCFG[i][2] & FirstMatch[i]; + assign W_Bits[i] = PMPCFG[i][1] & FirstMatch[i]; + assign R_Bits[i] = PMPCFG[i][0] & FirstMatch[i]; + end + // verilator lint_on UNOPTFLAT + endgenerate +/* // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region always_comb casez (Regions) 16'b???????????????1: MatchedRegion = 0; @@ -134,22 +149,18 @@ module pmpchecker ( assign L_Bit = PMPCFG[MatchedRegion][7] && Match; assign X_Bit = PMPCFG[MatchedRegion][2] && Match; assign W_Bit = PMPCFG[MatchedRegion][1] && Match; - assign R_Bit = PMPCFG[MatchedRegion][0] && Match; + assign R_Bit = PMPCFG[MatchedRegion][0] && Match; assign InvalidExecute = ExecuteAccessF && ~X_Bit; assign InvalidWrite = WriteAccessM && ~W_Bit; - assign InvalidRead = ReadAccessM && ~R_Bit; + assign InvalidRead = ReadAccessM && ~R_Bit;*/ - // *** don't cause faults when there are no PMPs - assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidExecute : - EnforcePMP && InvalidExecute; - assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidWrite : - EnforcePMP && InvalidWrite; - assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidRead : - EnforcePMP && InvalidRead; + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L_Bits : |ActiveRegion; + + assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X_Bits; + assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W_Bits; + assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R_Bits; assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM; diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 213bcde3..dfac5571 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -60,7 +60,7 @@ module csr #(parameter output logic STATUS_MIE, STATUS_SIE, output logic STATUS_MXR, STATUS_SUM, output logic STATUS_MPRV, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index 33b903a8..f30ebb4f 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -48,25 +48,9 @@ module csrm #(parameter MTVAL = 12'h343, MIP = 12'h344, PMPCFG0 = 12'h3A0, - PMPCFG1 = 12'h3A1, - PMPCFG2 = 12'h3A2, - PMPCFG3 = 12'h3A3, + // .. up to 15 more at consecutive addresses PMPADDR0 = 12'h3B0, - PMPADDR1 = 12'h3B1, - PMPADDR2 = 12'h3B2, - PMPADDR3 = 12'h3B3, - PMPADDR4 = 12'h3B4, - PMPADDR5 = 12'h3B5, - PMPADDR6 = 12'h3B6, - PMPADDR7 = 12'h3B7, - PMPADDR8 = 12'h3B8, - PMPADDR9 = 12'h3B9, - PMPADDR10 = 12'h3BA, - PMPADDR11 = 12'h3BB, - PMPADDR12 = 12'h3BC, - PMPADDR13 = 12'h3BD, - PMPADDR14 = 12'h3BE, - PMPADDR15 = 12'h3BF, + // ... up to 63 more at consecutive addresses TSELECT = 12'h7A0, TDATA1 = 12'h7A1, TDATA2 = 12'h7A2, @@ -90,7 +74,7 @@ module csrm #(parameter output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW, // 64-bit registers in RV64, or two 32-bit registers in RV32 - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [11:0] MIP_REGW, MIE_REGW, output logic WriteMSTATUSM, @@ -103,8 +87,8 @@ module csrm #(parameter logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; - logic WritePMPCFG0M, WritePMPCFG2M; - logic WritePMPADDRM [15:0]; + logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ; + logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; localparam MISA_26 = (`MISA) & 32'h03ffffff; @@ -120,7 +104,7 @@ module csrm #(parameter assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW; assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW; assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW; - assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; +/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW; assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW; assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW; @@ -137,10 +121,13 @@ module csrm #(parameter assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW; assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW; assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW; - assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; + assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */ assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW; assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW; + + + assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs @@ -172,33 +159,39 @@ module csrm #(parameter flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop + + // *** need to add support for locked PMPCFG and PMPADR + genvar i; generate - genvar i; - for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop + for(i=0; i<`PMP_ENTRIES; i++) begin + assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW; flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); end + for (i=0; i<`PMP_ENTRIES/8; i++) begin + if (`XLEN==64) begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]); + end else begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]); + flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); + end + end endgenerate - // PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32 - generate - if (`XLEN==64) begin - flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW); - flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW); - end else begin - logic WritePMPCFG1M, WritePMPCFG3M; - assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1)); - assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3)); - flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]); - flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]); - flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]); - flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]); - end - endgenerate // Read machine mode CSRs + // verilator lint_off WIDTH always_comb begin IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) && (CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode - case (CSRAdrM) + if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry + CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; + else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/8) begin + if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][`XLEN-1:0]; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][63:32]}; + end + else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; MVENDORID: CSRMReadValM = 0; MARCHID: CSRMReadValM = 0; @@ -219,7 +212,7 @@ module csrm #(parameter MTVAL: CSRMReadValM = MTVAL_REGW; MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; - PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; +/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]}; PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0]; PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]}; @@ -238,11 +231,12 @@ module csrm #(parameter PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12]; PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13]; PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14]; - PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; + PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */ default: begin CSRMReadValM = 0; IllegalCSRMAccessM = 1; end endcase end + // verilator lint_on WIDTH endmodule diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 1275cd4b..5ed8c880 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -68,7 +68,7 @@ module privileged ( output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, output logic STATUS_MXR, STATUS_SUM, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW ); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index a77c3ab0..9358417b 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -126,7 +126,7 @@ module wallypipelinedhart ( logic DSquashBusAccessM, ISquashBusAccessF; logic [5:0] DHSELRegionsM, IHSELRegionsF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. + var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? // IMem stalls From 76a43eb468762e99c61d03d66e6cbd73f7778aa6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 2 Jul 2021 11:05:25 -0400 Subject: [PATCH 28/38] Optimized PMP checker logic and added support for configurable number of PMP registers --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 954e126b..a6f1c013 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -53,7 +53,7 @@ `define DTLB_ENTRY_BITS 5 // Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 16 +`define PMP_ENTRIES 64 // Address space `define RESET_VECTOR 64'h0000000080000000 From 30ff212ca871902f302fa48d9e5d32c265aa2c8a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 2 Jul 2021 12:40:58 -0400 Subject: [PATCH 29/38] FPU update --- wally-pipelined/src/fpu/FPregfile.sv | 54 -- wally-pipelined/src/fpu/bk128.sv | 599 -------------------- wally-pipelined/src/fpu/bk13.sv | 97 ---- wally-pipelined/src/fpu/bk14.sv | 86 --- wally-pipelined/src/fpu/csa.sv | 70 --- wally-pipelined/src/fpu/divconv.sv | 18 +- wally-pipelined/src/fpu/fctrl.sv | 67 ++- wally-pipelined/src/fpu/fma2.sv | 10 +- wally-pipelined/src/fpu/fpadd_denorm.sv | 4 +- wally-pipelined/src/fpu/fpdiv.sv | 256 --------- wally-pipelined/src/fpu/fpu.sv | 369 ++++++------ wally-pipelined/src/fpu/fpuaddcvt1.sv | 4 +- wally-pipelined/src/fpu/fpuaddcvt2.sv | 14 +- wally-pipelined/src/fpu/fpuclassify.sv | 50 -- wally-pipelined/src/fpu/fpucmp1.sv | 465 --------------- wally-pipelined/src/fpu/fpucmp2.sv | 243 -------- wally-pipelined/src/fpu/fpuhazard.sv | 67 --- wally-pipelined/src/fpu/freg.sv | 515 ----------------- wally-pipelined/src/fpu/fsgn.sv | 19 +- wally-pipelined/src/fpu/ling_bk13.sv | 89 --- wally-pipelined/src/fpu/lzd_denorm.sv | 1 + wally-pipelined/src/fpu/mult_R4_64_64_cs.sv | 0 wally-pipelined/src/fpu/rounder_denorm.sv | 6 +- wally-pipelined/src/fpu/sbtm_a4.sv | 204 ------- wally-pipelined/src/fpu/sk14.sv | 90 --- 25 files changed, 254 insertions(+), 3143 deletions(-) delete mode 100644 wally-pipelined/src/fpu/FPregfile.sv delete mode 100755 wally-pipelined/src/fpu/bk128.sv delete mode 100755 wally-pipelined/src/fpu/bk13.sv delete mode 100755 wally-pipelined/src/fpu/bk14.sv delete mode 100644 wally-pipelined/src/fpu/csa.sv delete mode 100755 wally-pipelined/src/fpu/fpdiv.sv delete mode 100644 wally-pipelined/src/fpu/fpuclassify.sv delete mode 100755 wally-pipelined/src/fpu/fpucmp1.sv delete mode 100755 wally-pipelined/src/fpu/fpucmp2.sv delete mode 100644 wally-pipelined/src/fpu/fpuhazard.sv delete mode 100755 wally-pipelined/src/fpu/freg.sv delete mode 100755 wally-pipelined/src/fpu/ling_bk13.sv mode change 100755 => 100644 wally-pipelined/src/fpu/mult_R4_64_64_cs.sv delete mode 100755 wally-pipelined/src/fpu/sbtm_a4.sv delete mode 100755 wally-pipelined/src/fpu/sk14.sv diff --git a/wally-pipelined/src/fpu/FPregfile.sv b/wally-pipelined/src/fpu/FPregfile.sv deleted file mode 100644 index 99d18bce..00000000 --- a/wally-pipelined/src/fpu/FPregfile.sv +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////// -// regfile.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: 4-port register file -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module FPregfile ( - input logic clk, reset, - input logic we4, - input logic [ 4:0] a1, a2, a3, a4, - input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32 - output logic [63:0] rd1, rd2, rd3); - - logic [63:0] rf[31:0]; - integer i; - - // three ported register file - // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) - // write fourth port on rising edge of clock (A4/WD4/WE4) - // write occurs on falling edge of clock - - // reset is intended for simulation only, not synthesis - - always_ff @(negedge clk or posedge reset) - if (reset) for(i=0; i<32; i++) rf[i] <= 0; - else if (we4) rf[a4] <= wd4; - - assign #2 rd1 = rf[a1]; - assign #2 rd2 = rf[a2]; - assign #2 rd3 = rf[a3]; - -endmodule // regfile - diff --git a/wally-pipelined/src/fpu/bk128.sv b/wally-pipelined/src/fpu/bk128.sv deleted file mode 100755 index a302a031..00000000 --- a/wally-pipelined/src/fpu/bk128.sv +++ /dev/null @@ -1,599 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk128 (cout, sum, a, b, cin); - - input [127:0] a, b; - input cin; - - output [127:0] sum; - output cout; - - wire [128:0] p,g,t; - wire [127:0] c; - - // pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]^g[12]; - assign t[14]=p[14]; - assign t[15]=p[15]^g[14]; - assign t[16]=p[16]; - assign t[17]=p[17]^g[16]; - assign t[18]=p[18]; - assign t[19]=p[19]^g[18]; - assign t[20]=p[20]; - assign t[21]=p[21]^g[20]; - assign t[22]=p[22]; - assign t[23]=p[23]^g[22]; - assign t[24]=p[24]; - assign t[25]=p[25]^g[24]; - assign t[26]=p[26]; - assign t[27]=p[27]^g[26]; - assign t[28]=p[28]; - assign t[29]=p[29]^g[28]; - assign t[30]=p[30]; - assign t[31]=p[31]^g[30]; - assign t[32]=p[32]; - assign t[33]=p[33]^g[32]; - assign t[34]=p[34]; - assign t[35]=p[35]^g[34]; - assign t[36]=p[36]; - assign t[37]=p[37]^g[36]; - assign t[38]=p[38]; - assign t[39]=p[39]^g[38]; - assign t[40]=p[40]; - assign t[41]=p[41]^g[40]; - assign t[42]=p[42]; - assign t[43]=p[43]^g[42]; - assign t[44]=p[44]; - assign t[45]=p[45]^g[44]; - assign t[46]=p[46]; - assign t[47]=p[47]^g[46]; - assign t[48]=p[48]; - assign t[49]=p[49]^g[48]; - assign t[50]=p[50]; - assign t[51]=p[51]^g[50]; - assign t[52]=p[52]; - assign t[53]=p[53]^g[52]; - assign t[54]=p[54]; - assign t[55]=p[55]^g[54]; - assign t[56]=p[56]; - assign t[57]=p[57]^g[56]; - assign t[58]=p[58]; - assign t[59]=p[59]^g[58]; - assign t[60]=p[60]; - assign t[61]=p[61]^g[60]; - assign t[62]=p[62]; - assign t[63]=p[63]^g[62]; - assign t[64]=p[64]; - assign t[65]=p[65]^g[64]; - assign t[66]=p[66]; - assign t[67]=p[67]^g[66]; - assign t[68]=p[68]; - assign t[69]=p[69]^g[68]; - assign t[70]=p[70]; - assign t[71]=p[71]^g[70]; - assign t[72]=p[72]; - assign t[73]=p[73]^g[72]; - assign t[74]=p[74]; - assign t[75]=p[75]^g[74]; - assign t[76]=p[76]; - assign t[77]=p[77]^g[76]; - assign t[78]=p[78]; - assign t[79]=p[79]^g[78]; - assign t[80]=p[80]; - assign t[81]=p[81]^g[80]; - assign t[82]=p[82]; - assign t[83]=p[83]^g[82]; - assign t[84]=p[84]; - assign t[85]=p[85]^g[84]; - assign t[86]=p[86]; - assign t[87]=p[87]^g[86]; - assign t[88]=p[88]; - assign t[89]=p[89]^g[88]; - assign t[90]=p[90]; - assign t[91]=p[91]^g[90]; - assign t[92]=p[92]; - assign t[93]=p[93]^g[92]; - assign t[94]=p[94]; - assign t[95]=p[95]^g[94]; - assign t[96]=p[96]; - assign t[97]=p[97]^g[96]; - assign t[98]=p[98]; - assign t[99]=p[99]^g[98]; - assign t[100]=p[100]; - assign t[101]=p[101]^g[100]; - assign t[102]=p[102]; - assign t[103]=p[103]^g[102]; - assign t[104]=p[104]; - assign t[105]=p[105]^g[104]; - assign t[106]=p[106]; - assign t[107]=p[107]^g[106]; - assign t[108]=p[108]; - assign t[109]=p[109]^g[108]; - assign t[110]=p[110]; - assign t[111]=p[111]^g[110]; - assign t[112]=p[112]; - assign t[113]=p[113]^g[112]; - assign t[114]=p[114]; - assign t[115]=p[115]^g[114]; - assign t[116]=p[116]; - assign t[117]=p[117]^g[116]; - assign t[118]=p[118]; - assign t[119]=p[119]^g[118]; - assign t[120]=p[120]; - assign t[121]=p[121]^g[120]; - assign t[122]=p[122]; - assign t[123]=p[123]^g[122]; - assign t[124]=p[124]; - assign t[125]=p[125]^g[124]; - assign t[126]=p[126]; - assign t[127]=p[127]^g[126]; - assign t[128]=p[128]; - - // prefix tree - brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]); - - // post-computation - assign sum=p[128:1]^c; - assign cout=g[128]|(p[128]&c[127]); - -endmodule - -module brent_kung_cs128 (c, p, g); - - input [127:0] p; - input [127:0] g; - output [128:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]}); - - black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]}); - black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]}); - black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]}); - black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]}); - black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]}); - black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]}); - black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]}); - black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]}); - - black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]}); - black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]}); - black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]}); - black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]}); - black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]}); - black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]}); - black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]}); - black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]}); - - black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]}); - black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]}); - black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]}); - black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]}); - black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]}); - black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]}); - black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]}); - black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]}); - - black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]}); - black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]}); - black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]}); - black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]}); - black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]}); - black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]}); - black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]}); - black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]}); - - black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]}); - black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]}); - black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]}); - black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]}); - black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]}); - black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]}); - black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]}); - black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]}); - - black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]}); - black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]}); - black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]}); - black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]}); - black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]}); - black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]}); - black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]}); - black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]}); - - black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]}); - black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]}); - black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]}); - black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]}); - black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]}); - black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]}); - black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]}); - black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]}); - - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16}); - black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20}); - black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24}); - black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28}); - - black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32}); - black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36}); - black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40}); - black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44}); - black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48}); - black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52}); - black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56}); - black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60}); - - black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64}); - black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68}); - black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72}); - black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76}); - black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80}); - black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84}); - black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88}); - black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92}); - - black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96}); - black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100}); - black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104}); - black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108}); - black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112}); - black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116}); - black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120}); - black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124}); - - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16}); - black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24}); - black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32}); - black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40}); - black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48}); - black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56}); - - black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64}); - black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72}); - black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80}); - black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88}); - black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96}); - black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104}); - black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112}); - black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120}); - - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16}); - black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32}); - black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48}); - black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64}); - black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80}); - black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96}); - black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112}); - - - // Stage 5: Generates G/P pairs that span 16 bits - grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16); - black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32}); - black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64}); - black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96}); - - // Stage 6: Generates G/P pairs that span 32 bits - grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32); - black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64}); - - // Stage 7: Generates G/P pairs that span 64 bits - grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64); - - // Stage 8: Generates G/P pairs that span 32 bits - grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64); - - // Stage 9: Generates G/P pairs that span 16 bits - grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32); - grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64); - grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96); - - // Stage 10: Generates G/P pairs that span 8 bits - grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16); - grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32); - grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48); - grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64); - grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80); - grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96); - grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112); - - // Stage 11: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16); - grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24); - grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32); - grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40); - grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48); - grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56); - grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64); - grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72); - grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80); - grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88); - grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96); - grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104); - grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112); - grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120); - - // Stage 12: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16); - grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20); - grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24); - grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28); - grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32); - grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36); - grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40); - grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44); - grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48); - grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52); - grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56); - grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60); - grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64); - grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68); - grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72); - grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76); - grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80); - grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84); - grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88); - grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92); - grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96); - grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100); - grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104); - grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108); - grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112); - grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116); - grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120); - grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]); - grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]); - grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]); - grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]); - grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]); - grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]); - grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]); - grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]); - grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]); - grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]); - grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]); - grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]); - grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]); - grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]); - grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]); - grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]); - grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]); - grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]); - grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]); - grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]); - grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]); - grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]); - grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]); - grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]); - grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]); - grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]); - grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]); - grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]); - grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]); - grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]); - grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]); - grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]); - grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]); - grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]); - grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]); - grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]); - grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]); - grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]); - grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]); - grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]); - grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]); - grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]); - grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]); - grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]); - grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]); - grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]); - grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]); - grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]); - grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]); - grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]); - grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]); - grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]); - grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]); - grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]); - grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]); - grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]); - grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - assign c[15]=G_14_0; - assign c[16]=G_15_0; - assign c[17]=G_16_0; - - assign c[18]=G_17_0; - assign c[19]=G_18_0; - assign c[20]=G_19_0; - assign c[21]=G_20_0; - assign c[22]=G_21_0; - assign c[23]=G_22_0; - assign c[24]=G_23_0; - assign c[25]=G_24_0; - - assign c[26]=G_25_0; - assign c[27]=G_26_0; - assign c[28]=G_27_0; - assign c[29]=G_28_0; - assign c[30]=G_29_0; - assign c[31]=G_30_0; - assign c[32]=G_31_0; - assign c[33]=G_32_0; - - assign c[34]=G_33_0; - assign c[35]=G_34_0; - assign c[36]=G_35_0; - assign c[37]=G_36_0; - assign c[38]=G_37_0; - assign c[39]=G_38_0; - assign c[40]=G_39_0; - assign c[41]=G_40_0; - - assign c[42]=G_41_0; - assign c[43]=G_42_0; - assign c[44]=G_43_0; - assign c[45]=G_44_0; - assign c[46]=G_45_0; - assign c[47]=G_46_0; - assign c[48]=G_47_0; - assign c[49]=G_48_0; - - assign c[50]=G_49_0; - assign c[51]=G_50_0; - assign c[52]=G_51_0; - assign c[53]=G_52_0; - assign c[54]=G_53_0; - assign c[55]=G_54_0; - assign c[56]=G_55_0; - assign c[57]=G_56_0; - - assign c[58]=G_57_0; - assign c[59]=G_58_0; - assign c[60]=G_59_0; - assign c[61]=G_60_0; - assign c[62]=G_61_0; - assign c[63]=G_62_0; - assign c[64]=G_63_0; - assign c[65]=G_64_0; - - assign c[66]=G_65_0; - assign c[67]=G_66_0; - assign c[68]=G_67_0; - assign c[69]=G_68_0; - assign c[70]=G_69_0; - assign c[71]=G_70_0; - assign c[72]=G_71_0; - assign c[73]=G_72_0; - - assign c[74]=G_73_0; - assign c[75]=G_74_0; - assign c[76]=G_75_0; - assign c[77]=G_76_0; - assign c[78]=G_77_0; - assign c[79]=G_78_0; - assign c[80]=G_79_0; - assign c[81]=G_80_0; - - assign c[82]=G_81_0; - assign c[83]=G_82_0; - assign c[84]=G_83_0; - assign c[85]=G_84_0; - assign c[86]=G_85_0; - assign c[87]=G_86_0; - assign c[88]=G_87_0; - assign c[89]=G_88_0; - - assign c[90]=G_89_0; - assign c[91]=G_90_0; - assign c[92]=G_91_0; - assign c[93]=G_92_0; - assign c[94]=G_93_0; - assign c[95]=G_94_0; - assign c[96]=G_95_0; - assign c[97]=G_96_0; - - assign c[98]=G_97_0; - assign c[99]=G_98_0; - assign c[100]=G_99_0; - assign c[101]=G_100_0; - assign c[102]=G_101_0; - assign c[103]=G_102_0; - assign c[104]=G_103_0; - assign c[105]=G_104_0; - - assign c[106]=G_105_0; - assign c[107]=G_106_0; - assign c[108]=G_107_0; - assign c[109]=G_108_0; - assign c[110]=G_109_0; - assign c[111]=G_110_0; - assign c[112]=G_111_0; - assign c[113]=G_112_0; - - assign c[114]=G_113_0; - assign c[115]=G_114_0; - assign c[116]=G_115_0; - assign c[117]=G_116_0; - assign c[118]=G_117_0; - assign c[119]=G_118_0; - assign c[120]=G_119_0; - assign c[121]=G_120_0; - - assign c[122]=G_121_0; - assign c[123]=G_122_0; - assign c[124]=G_123_0; - assign c[125]=G_124_0; - assign c[126]=G_125_0; - assign c[127]=G_126_0; - assign c[128]=G_127_0; - -endmodule // brent_kung_cs - - diff --git a/wally-pipelined/src/fpu/bk13.sv b/wally-pipelined/src/fpu/bk13.sv deleted file mode 100755 index 84158db9..00000000 --- a/wally-pipelined/src/fpu/bk13.sv +++ /dev/null @@ -1,97 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g,t; - wire [12:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]; - -// prefix tree - brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]); - -// post-computation - assign sum=p[13:1]^c; - assign cout=g[13]|(p[13]&c[12]); - -endmodule - -module brent_kung_cs13 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - -endmodule diff --git a/wally-pipelined/src/fpu/bk14.sv b/wally-pipelined/src/fpu/bk14.sv deleted file mode 100755 index 46872167..00000000 --- a/wally-pipelined/src/fpu/bk14.sv +++ /dev/null @@ -1,86 +0,0 @@ -// Brent-Kung Prefix Adder - -module bk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - brent_kung14 prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module brent_kung14 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [14:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule diff --git a/wally-pipelined/src/fpu/csa.sv b/wally-pipelined/src/fpu/csa.sv deleted file mode 100644 index 1e5682cf..00000000 --- a/wally-pipelined/src/fpu/csa.sv +++ /dev/null @@ -1,70 +0,0 @@ -module ha (C, S, A, B) ; - - input A, B; - output S, C; - - assign S = A^B; - assign C = A&B; - -endmodule // HA - -// module fa (input logic a, b, c, output logic sum, carry); - -// assign sum = a^b^c; -// assign carry = a&b|a&c|b&c; - -// endmodule // fa - -// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout); - -// input logic [WIDTH-1:0] a, b, c; - -// output logic [WIDTH-1:0] sum, carry; -// output logic cout; - -// logic [WIDTH:0] carry_temp; -// genvar i; -// generate -// for (i=0;i fp // fmv.w.x = ???0 // fmv.w.d = ???1 diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 131f9839..518b7a76 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -16,8 +16,8 @@ module fma2( input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + output logic [63:0] FMAResM, // FMA final result + output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} @@ -57,7 +57,7 @@ module fma2( logic [12:0] MaxExp; // maximum value of the exponent logic [12:0] FracLen; // length of the fraction logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency) logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results @@ -316,7 +316,7 @@ module fma2( // Combine flags // - FMA can't set the Divide by zero flag // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -337,7 +337,7 @@ module fma2( assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : + assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf diff --git a/wally-pipelined/src/fpu/fpadd_denorm.sv b/wally-pipelined/src/fpu/fpadd_denorm.sv index eabfcd3a..43de3087 100755 --- a/wally-pipelined/src/fpu/fpadd_denorm.sv +++ b/wally-pipelined/src/fpu/fpadd_denorm.sv @@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn); assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (sum, mantissaA3, mantissaB3, sub); + cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); + cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder // Determine the correct sign of the result assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63]; diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv deleted file mode 100755 index 8c305f3e..00000000 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ /dev/null @@ -1,256 +0,0 @@ -// -// File name : fpdiv -// Title : Floating-Point Divider/Square-Root -// project : FPU -// Library : fpdiv -// Author(s) : James E. Stine, Jr. -// Purpose : definition of main unit to floating-point div/sqrt -// notes : -// -// Copyright Oklahoma State University -// -// Basic Operations -// -// Step 1: Load operands, set flags, and convert SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Exponent Logic -// Step 4: Divide/Sqrt using Goldschmidt -// Step 5: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 6: Round the result.// -// Step 7: Put quotient/remainder onto output. -// - -// `timescale 1ps/1ps -module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, - FDivStartE, reset, clk, FDivBusyE, HoldInputs); - - input [63:0] DivInput1E; // 1st input operand (A) - input [63:0] DivInput2E; // 2nd input operand (B) - input [2:0] FrmE; // Rounding mode - specify values - input DivOpType; // Function opcode - input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this - input DivOvEn; // Overflow trap enabled - input DivUnEn; // Underflow trap enabled - - input FDivStartE; - input reset; - input clk; - - output [63:0] FDivResultM; // Result of operation - output [4:0] FDivFlagsM; // IEEE exception flags - output DivDenormM; // DivDenormM on input or output - output FDivSqrtDoneE; - output FDivBusyE, HoldInputs; - - supply1 vdd; - supply0 vss; - - wire [63:0] Float1; - wire [63:0] Float2; - wire [63:0] IntValue; - - wire [12:0] exp1, exp2, expF; - wire [12:0] exp_diff, bias; - wire [13:0] exp_sqrt; - wire [12:0] exp_s; - wire [12:0] exp_c; - - wire [10:0] exponent, exp_pre; - wire [63:0] Result; - wire [52:0] mantissaA; - wire [52:0] mantissaB; - wire [63:0] sum, sum_tc, sum_corr, sum_norm; - - wire [5:0] align_shift; - wire [5:0] norm_shift; - wire [2:0] sel_inv; - wire op1_Norm, op2_Norm; - wire opA_Norm, opB_Norm; - wire Invalid; - wire DenormIn, DenormIO; - wire [4:0] FlagsIn; - wire exp_gt63; - wire Sticky_out; - wire signResult, sign_corr; - wire corr_sign; - wire zeroB; - wire convert; - wire swap; - wire sub; - - wire [63:0] q1, qm1, qp1, q0, qm0, qp0; - wire [63:0] rega_out, regb_out, regc_out, regd_out; - wire [127:0] regr_out; - wire [2:0] sel_muxa, sel_muxb; - wire sel_muxr; - wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs; - - wire donev, sel_muxrv, sel_muxsv; - wire [1:0] sel_muxav, sel_muxbv; - wire load_regav, load_regbv, load_regcv; - wire load_regrv, load_regsv; - - logic exp_cout1, exp_cout2, exp_odd, open; - - - // Convert the input operands to their appropriate forms based on - // the orignal operands, the DivOpType , and their precision FmtE. - // Single precision inputs are converted to double precision - // and the sign of the first operand is set appropratiately based on - // if the operation is absolute value or negation. - convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); - - // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in - // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. - // sub is one if the effective operation is subtaction. - exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, - Float1, Float2, DivOpType); - - // Determine Sign/Mantissa - assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType; - assign mantissaA = {vdd, Float1[51:0]}; - assign mantissaB = {vdd, Float2[51:0]}; - // Perform Exponent Subtraction - expA - expB + Bias - assign exp1 = {2'b0, Float1[62:52]}; - assign exp2 = {2'b0, Float2[62:52]}; - // bias : DP = 2^{11-1}-1 = 1023 - assign bias = {3'h0, 10'h3FF}; - // Divide exponent - csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); - exp_add explogic1 (exp_cout1, {open, exp_diff}, - {vss, exp_s}, {vss, exp_c}, 1'b1); - // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? vss : vdd; - exp_add explogic2 (exp_cout2, exp_sqrt, - {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); - // Choose correct exponent - assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; - - // Main Goldschmidt/Division Routine - divconv goldy (q1, qm1, qp1, q0, qm0, qp0, - rega_out, regb_out, regc_out, regd_out, - regr_out, mantissaB, mantissaA, - sel_muxa, sel_muxb, sel_muxr, - reset, clk, - load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, FmtE, DivOpType, exp_odd); - - // FSM : control divider - fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, - clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs); - - // Round the mantissa to a 52-bit value, with the leading one - // removed. The rounding units also handles special cases and - // set the exception flags. - //***add max magnitude and swap negitive and positive infinity - rounder_div divround1 (Result, DenormIO, FlagsIn, - FrmE, FmtE, DivOvEn, DivUnEn, expF, - sel_inv, Invalid, DenormIn, signResult, - q1, qm1, qp1, q0, qm0, qp0, regr_out); - - // Store the final result and the exception flags in registers. - flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); - flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); - flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM); - -endmodule // fpadd - -// -// Brent-Kung Prefix Adder -// (yes, it is 14 bits as my generator is broken for 13 bits :( -// assume, synthesizer will delete stuff not needed ) -// -module exp_add (cout, sum, a, b, cin); - - input [13:0] a, b; - input cin; - - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - - // pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - - // prefix tree - brent_kung prefix_tree(c, p[13:0], g[13:0]); - - // post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule // exp_add - -module brent_kung (c, p, g); - - input [13:0] p; - input [13:0] g; - output [14:1] c; - - logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8; - logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8; - logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/FmtE pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/FmtE pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/FmtE pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/FmtE pairs that span 8 bits - - // Stage 5: Generates G/FmtE pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/FmtE pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule // brent_kung - diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 5c15268e..ff29dfd7 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,7 +34,7 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic FStallD, // Stall the decode stage if Div/Sqrt instruction + output logic FStallD, // Stall the decode stage output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, @@ -42,48 +42,38 @@ module fpu ( output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result - +// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // control logic signal instantiation logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register - logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD; // Read and write enable for memory - logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal - logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal - logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal - logic SrcYUsedD; // Is input 2 used - logic SrcZUsedD; // Is input 3 used + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals - logic DivDenormE, DivDenormM, DivDenormW; - logic DivOvEn, DivUnEn; - logic [63:0] FDivResultE, FDivResultM, FDivResultW; - logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW; - logic FDivSqrtDoneE, FDivSqrtDoneM; + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; logic [63:0] DivInput1E, DivInput2E; logic HoldInputs; // keep forwarded inputs arround durring division // FMA signals - logic [105:0] ProdManE, ProdManM; + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units logic [161:0] AlignedAddendE, AlignedAddendM; logic [12:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; @@ -91,93 +81,112 @@ module fpu ( logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FmaResultM, FmaResultW; - logic [4:0] FmaFlagsM, FmaFlagsW; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; // add/cvt signals - logic [63:0] AddSumE, AddSumTcE; - logic [3:0] AddSelInvE; - logic [10:0] AddExpPostSumE; - logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; - logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; - logic AddConvertE; - logic [63:0] AddFloat1E, AddFloat2E; - logic [11:0] AddExp1DenormE, AddExp2DenormE; - logic [10:0] AddExponentE; - logic [2:0] AddRmE; - logic [3:0] AddOpTypeE; - logic AddPE, AddOvEnE, AddUnEnE; - logic AddDenormM; - logic [63:0] AddSumM, AddSumTcM; - logic [3:0] AddSelInvM; - logic [10:0] AddExpPostSumM; - logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; - logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; - logic AddConvertM, AddSignM; - logic [63:0] AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentM; - logic [63:0] AddOp1M, AddOp2M; - logic [2:0] AddRmM; - logic [3:0] AddOpTypeM; - logic AddPM, AddOvEnM, AddUnEnM; - logic [63:0] FAddResultM, FAddResultW; - logic [4:0] FAddFlagsM, FAddFlagsW; + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; // cmp signals - logic CmpInvalidE, CmpInvalidM, CmpInvalidW; - logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; // fsgn signals - logic [63:0] SgnResultE, SgnResultM, SgnResultW; - logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; + logic [63:0] AlignedSrcAM; // classify signals - logic [63:0] ClassResultE, ClassResultM, ClassResultW; + logic [63:0] ClassResE, ClassResM; // 64-bit FPU result - logic [63:0] FPUResult64W, FPUResult64E; + logic [63:0] FPUResult64W; logic [4:0] FPUFlagsW; + + + + + + + //DECODE STAGE // top-level controller for FPU - fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // regfile instantiation - FPregfile fpregfile (clk, reset, FWriteEnW, + fregfile fregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResult64W, FRD1D, FRD2D, FRD3D); + + + + + + + + //***************** - // fpregfile D/E pipe registers + // D/E pipe registers //***************** flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - - //***************** - // other D/E pipe registers - //***************** - flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + //EXECUTION STAGE // Hazard unit for FPU - fpuhazard hazard(.*); + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); @@ -186,7 +195,9 @@ module fpu ( // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); // first and only instance of floating-point divider logic fpdivClk; @@ -204,174 +215,140 @@ module fpu ( .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.*); + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); // first and only instance of floating-point sign converter - fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); // first and only instance of floating-point classify unit - fpuclassify fpuclass (.*); + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); // output for store instructions assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - + //***swap to mux + + + + + + + + + + //***************** - //fpregfile D/E pipe registers + // E/M pipe registers //***************** flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - //***************** - // fma E/M pipe registers - //***************** - flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM); - flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM); - flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM); - flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM); - flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM); - flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM); - flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM); - flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM); - flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM); - flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM); - flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM); + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - //***************** - // fpadd E/M pipe registers - //***************** flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - //***************** - // fpcmp E/M pipe registers - //***************** - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); - flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - //***************** - // fpsgn E/M pipe registers - //***************** - flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM); - - //***************** - // other E/M pipe registers - //***************** flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - //***************** - // fpuclassify E/M pipe registers - //***************** - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM); - + + + + + + + //BEGIN MEMORY STAGE - mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); - assign FFlgM = CmpInvalidM & FResSelM[1]; + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + //***change to mux assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.*); + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - + + + + + + + //***************** - //fpregfile M/W pipe registers + // M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); - flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - //***************** - // fma M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW); - - //***************** - // fpdiv M/W pipe registers - //***************** flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - //***************** - // fpadd M/W pipe registers - //***************** - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - //***************** - // fpcmp M/W pipe registers - //***************** - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); - // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - //***************** - // fpsgn M/W pipe registers - //***************** - flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW); - - //***************** - // other M/W pipe registers - //***************** flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - //***************** - // fpuclassify M/W pipe registers - //***************** - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -385,13 +362,13 @@ module fpu ( - +//***turn into muxs always_comb begin case (FResultSelW) 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FmaFlagsW; - 3'b010 : FPUFlagsW = FAddFlagsW; - 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase @@ -400,8 +377,8 @@ module fpu ( always_comb begin case (FResultSelW) 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FmaResultW; - 3'b010 : FPUResult64W = FAddResultW; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; 3'b011 : FPUResult64W = FDivResultW; 3'b100 : FPUResult64W = FResW; default : FPUResult64W = 64'bxxxxx; @@ -415,7 +392,9 @@ module fpu ( // define offsets for LSB zero extension or truncation always_comb begin // zero extension +//***turn into mux FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage SetFflagsM = FPUFlagsW; end diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index 8f045dcd..1b86b198 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); + cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); + cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder // Finds normal underflow result to determine whether to round final exponent down //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv index 46eac200..1fe8ac65 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt2.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv @@ -27,7 +27,7 @@ // -module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); +module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); input [2:0] FrmM; // Rounding mode - specify values input [3:0] FOpCtrlM; // Function opcode @@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS input AddSwapM; // input AddNormOvflowM; - output [63:0] FAddResultM; // Result of operation - output [4:0] FAddFlagsM; // IEEE exception flags - output AddDenormM; // AddDenormM on input or output + output [63:0] FAddResM; // Result of operation + output [4:0] FAddFlgM; // IEEE exception flags + wire AddDenormM; // AddDenormM on input or output wire P; assign P = ~FmtM | FOpCtrlM[2]; @@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS // exactly where the rounding point is. The rounding units also // handles special cases and set the exception flags. - // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to + // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to // help in processor reservation station detection of load/stores. In // other words, the processor would like to know ahead of time that // if the result is an exception then don't load or store. @@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM); // Store the final result and the exception flags in registers. - assign FAddResultM = Result; - assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn}; + assign FAddResM = Result; + assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn}; endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv deleted file mode 100644 index b320b2f0..00000000 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ /dev/null @@ -1,50 +0,0 @@ - -`include "wally-config.vh" - -module fpuclassify ( - input logic [63:0] SrcXE, - input logic FmtE, // 0-single 1-double - output logic [63:0] ClassResultE - ); - - logic [31:0] single; - logic [63:0] double; - logic sign; - logic infinity, NaN, zero, normal, subnormal; - logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; - - // single and double precision layouts - assign single = SrcXE[63:32]; - assign double = SrcXE; - assign sign = SrcXE[63]; - - // basic calculations for readabillity - assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; - assign ExpZero = ~ExpNotZero; - assign ExpOnes = FmtE ? &double[62:52] : &single[30:23]; - assign ManNotZero = FmtE ? |double[51:0] : |single[22:0]; - assign ManZero = ~ManNotZero; - assign FirstBitMan = FmtE ? double[51] : single[22]; - - // determine the type of number - assign NaN = ExpOnes & ManNotZero; - assign infinity = ExpOnes & ManZero; - assign zero = ExpZero & ManZero; - assign subnormal= ExpZero & ManNotZero; - assign normal = ExpNotZero; - - // determine sub category and combine into the result - // bit 0 - -infinity - // bit 1 - -normal - // bit 2 - -subnormal - // bit 3 - -zero - // bit 4 - +zero - // bit 5 - +subnormal - // bit 6 - +normal - // bit 7 - +infinity - // bit 8 - signaling NaN - // bit 9 - quiet NaN - assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; - -endmodule diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv deleted file mode 100755 index 3a8245e6..00000000 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ /dev/null @@ -1,465 +0,0 @@ - -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 - -`include "wally-config.vh" -module fpucmp1 ( - input logic [63:0] op1, - input logic [63:0] op2, - input logic [2:0] FOpCtrlE, - input logic FmtE, - - - output logic Invalid, // Invalid Operation - // output logic [1:0] FCC, // Condition Codes - output logic [63:0] FCmpResultE); - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - logic [1:0] FCC; // Condition Codes - logic [7:0] w, x; - logic ANaN, BNaN; - logic Azero, Bzero; - logic LT; // magnitude op1 < magnitude op2 - logic EQ; // magnitude op1 = magnitude op2 - - magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); - - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); - - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - magcompare64b_2 magcomp2 (LT, EQ, w, x); - - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); - -endmodule // fpcomp - -// module magcompare2b (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// // Determine if A < B using a minimized sum-of-products expression -// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; -// // Determine if A > B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -module magcompare2c (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - assign LT = B[1] | (!A[1]&B[0]); - assign GT = A[1] | (!B[1]&A[0]); - -endmodule // magcompare2b - -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare64b_1 (w, x, A, B); - - input logic [63:0] A; - input logic [63:0] B; - - logic [31:0] s; - logic [31:0] t; - logic [15:0] u; - logic [15:0] v; - output logic [7:0] w; - output logic [7:0] x; - - magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]); - magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]); - magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]); - magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]); - magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]); - magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]); - magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]); - magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]); - magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]); - magcompare2b magA(s[9], t[9], A[19:18], B[19:18]); - magcompare2b magB(s[10], t[10], A[21:20], B[21:20]); - magcompare2b magC(s[11], t[11], A[23:22], B[23:22]); - magcompare2b magD(s[12], t[12], A[25:24], B[25:24]); - magcompare2b magE(s[13], t[13], A[27:26], B[27:26]); - magcompare2b magF(s[14], t[14], A[29:28], B[29:28]); - magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]); - magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]); - magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]); - magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]); - magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]); - magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]); - magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]); - magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]); - magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]); - magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]); - magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]); - magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]); - magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]); - magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]); - magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]); - magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]); - magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]); - - magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]); - magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]); - magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]); - magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]); - magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]); - magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]); - magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]); - magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]); - magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]); - magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]); - magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]); - magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]); - magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]); - magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]); - magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]); - magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]); - - magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]); - magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]); - magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]); - magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]); - magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]); - magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]); - magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]); - magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); - - input logic [63:0] A; - input logic [63:0] B; - input logic [2:0] FOpCtrlE; - - logic dp, sp, hp; - - output logic ANaN; - output logic BNaN; - output logic Azero; - output logic Bzero; - - assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; - assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; - assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; - - // Test if A or B is NaN. - assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & - ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | - (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | - (hp&(A[57]|A[56]))); - - assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & - ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | - (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | - (hp&(B[57]|B[56]))); - - // Test if A is +0 or -0 when viewed as a floating point number (i.e, - // the 63 least siginficant bits of A are zero). - // Depending on how this synthesizes, it may work better to replace - // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) - assign Azero = (A[62:0] == 63'h0); - assign Bzero = (B[62:0] == 63'h0); - -endmodule // exception_cmp -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 - - -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); - -// endmodule // magcompare2b - -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare64b_2 (LT, EQ, w, x); - - input logic [7:0] w; - input logic [7:0] x; - logic [3:0] y; - logic [3:0] z; - logic [1:0] a; - logic [1:0] b; - logic GT; - - output logic LT; - output logic EQ; - - magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); - magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); - magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); - magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - - magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); - magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - - magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - - assign EQ = ~(LT | GT); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_2 ( - input logic [63:0] A, - input logic [63:0] B, - input logic FmtE, - input logic LT_mag, - input logic EQ_mag, - input logic [2:0] FOpCtrlE, - - output logic invalid, - output logic [1:0] fcc, - output logic [63:0] FCmpResultE, - - input logic Azero, - input logic Bzero, - input logic ANaN, - input logic BNaN); - - logic dp; - logic sp; - logic hp; - logic ASNaN; - logic BSNaN; - logic UO; - logic GT; - logic LT; - logic EQ; - logic [62:0] sixtythreezeros = 63'h0; - - assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; - assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; - assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; - - // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating - // point comparison is being performed. - assign UO = (ANaN | BNaN); - - // Test if A or B is a signaling NaN. - assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); - assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - - // If either A or B is a signaling NaN the "Invalid Operation" - // exception flag is set to one; otherwise it is zero. - assign invalid = (ASNaN | BSNaN); - - // A and B are equal if (their magnitudes are equal) AND ((their signs are - // equal) or (their magnitudes are zero AND they are floating point - // numbers)). Also, A and B are not equal if they are unordered. - assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - - // A is less than B if (A is negative and B is posiive) OR - // (A and B are positive and the magnitude of A is less than - // the magnitude of B) or (A and B are negative integers and - // the magnitude of A is less than the magnitude of B) or - // (A and B are negative floating point numbers and - // the magnitude of A is greater than the magnitude of B). - // Also, A is not less than B if A and B are equal or unordered. - assign LT = ((~LT_mag & A[63] & B[63]) | - (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; - - // A is greater than B when LT, EQ, and UO are are false. - assign GT = ~(LT | EQ | UO); - - // Note: it may be possible to optimize the setting of fcc - // a little more, but it is probably not worth the effort. - - // Set the bits of fcc based on LT, GT, EQ, and UO - assign fcc[0] = LT | UO; - assign fcc[1] = GT | UO; - - always_comb begin - case (FOpCtrlE[2:0]) - 3'b111: FCmpResultE = LT ? A : B;//min - 3'b101: FCmpResultE = GT ? A : B;//max - 3'b010: FCmpResultE = {63'b0, EQ};//equal - 3'b001: FCmpResultE = {63'b0, LT};//less than - 3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal - default: FCmpResultE = 64'b0; - endcase - end - -endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv deleted file mode 100755 index ee14afb9..00000000 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ /dev/null @@ -1,243 +0,0 @@ -// // -// // File name : fpcomp.v -// // Title : Floating-Point Comparator -// // project : FPU -// // Library : fpcomp -// // Author(s) : James E. Stine -// // Purpose : definition of main unit to floating-point comparator -// // notes : -// // -// // Copyright Oklahoma State University -// // -// // Floating Point Comparator (Algorithm) -// // -// // 1.) Performs sign-extension if the inputs are 32-bit integers. -// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// // 3.) Check for special cases (+0=-0, unordered, and infinite values) -// // and correct for sign bits -// // -// // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// // signals, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 (unused) -// // -// // The comparator produces a 2-bit signal FCC, which -// // indicates the result of the comparison: -// // -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // -// // It also produces an invalid operation flag, which is one -// // if either of the input operands is a signaling NaN per 754 - -// module fpucmp2 ( -// input logic [63:0] op1, -// input logic [63:0] op2, -// input logic [1:0] Sel, -// input logic [7:0] w, x, -// input logic ANaN, BNaN, -// input logic Azero, Bzero, -// input logic [3:0] FOpCtrlM, -// input logic FmtM, - -// output logic Invalid, // Invalid Operation -// output logic [1:0] FCC, // Condition Codes -// output logic [63:0] FCmpResultM); - -// logic LT; // magnitude op1 < magnitude op2 -// logic EQ; // magnitude op1 = magnitude op2 - -// // Perform magnitude comparison between the 63 least signficant bits -// // of the input operands. Only LT and EQ are returned, since GT can -// // be determined from these values. -// magcompare64b_2 magcomp2 (LT, EQ, w, x); - -// // Determine final values based on output of magnitude comparison, -// // sign bits, and special case testing. -// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); - - -// endmodule // fpcomp - -// /*module magcompare2b (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// // Determine if A < B using a minimized sum-of-products expression -// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; -// // Determine if A > B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule*/ // magcompare2b - -// // 2-bit magnitude comparator -// // This module compares two 2-bit values A and B. LT is '1' if A < B -// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// // this version actually incorporates don't cares into the equation to -// // simplify the optimization - -// // module magcompare2c (LT, GT, A, B); - -// // input logic [1:0] A; -// // input logic [1:0] B; - -// // output logic LT; -// // output logic GT; - -// // assign LT = B[1] | (!A[1]&B[0]); -// // assign GT = A[1] | (!B[1]&A[0]); - -// // endmodule // magcompare2b - -// // This module compares two 64-bit values A and B. LT is '1' if A < B -// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// // This structure was modified so -// // that it only does a strict magnitdude comparison, and only -// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// // of 63 2-bit magnitude comparators, followed by one OR gates. -// // -// // J. E. Stine and M. J. Schulte, "A combined two's complement and -// // floating-point comparator," 2005 IEEE International Symposium on -// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// // doi: 10.1109/ISCAS.2005.1464531 - -// module magcompare64b_2 (LT, EQ, w, x); - -// input logic [7:0] w; -// input logic [7:0] x; -// logic [3:0] y; -// logic [3:0] z; -// logic [1:0] a; -// logic [1:0] b; -// logic GT; - -// output logic LT; -// output logic EQ; - -// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); -// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); -// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); -// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - -// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); -// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - -// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - -// assign EQ = ~(LT | GT); - -// endmodule // magcompare64b - -// // This module takes 64-bits inputs A and B, two magnitude comparison -// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 bfloat precision numbers -// // -// // The comparator produces a 2-bit signal fcc, which -// // indicates the result of the comparison as follows: -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // It also produces a invalid operation flag, which is one -// // if either of the input operands is a signaling NaN. - -// module exception_cmp_2 ( -// input logic [63:0] A, -// input logic [63:0] B, -// input logic FmtM, -// input logic LT_mag, -// input logic EQ_mag, -// input logic [1:0] Sel, -// input logic [3:0] FOpCtrlM, - -// output logic invalid, -// output logic [1:0] fcc, -// output logic [63:0] FCmpResultM, - -// input logic Azero, -// input logic Bzero, -// input logic ANaN, -// input logic BNaN); - -// logic dp; -// logic sp; -// logic hp; -// logic ASNaN; -// logic BSNaN; -// logic UO; -// logic GT; -// logic LT; -// logic EQ; -// logic [62:0] sixtythreezeros = 63'h0; - -// assign dp = !Sel[1]&!Sel[0]; -// assign sp = !Sel[1]&Sel[0]; -// assign hp = Sel[1]&!Sel[0]; - -// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating -// // point comparison is being performed. -// assign UO = (ANaN | BNaN); - -// // Test if A or B is a signaling NaN. -// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); -// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - -// // If either A or B is a signaling NaN the "Invalid Operation" -// // exception flag is set to one; otherwise it is zero. -// assign invalid = (ASNaN | BSNaN); - -// // A and B are equal if (their magnitudes are equal) AND ((their signs are -// // equal) or (their magnitudes are zero AND they are floating point -// // numbers)). Also, A and B are not equal if they are unordered. -// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - -// // A is less than B if (A is negative and B is posiive) OR -// // (A and B are positive and the magnitude of A is less than -// // the magnitude of B) or (A and B are negative integers and -// // the magnitude of A is less than the magnitude of B) or -// // (A and B are negative floating point numbers and -// // the magnitude of A is greater than the magnitude of B). -// // Also, A is not less than B if A and B are equal or unordered. -// assign LT = ((~LT_mag & A[63] & B[63]) | -// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; - -// // A is greater than B when LT, EQ, and UO are are false. -// assign GT = ~(LT | EQ | UO); - -// // Note: it may be possible to optimize the setting of fcc -// // a little more, but it is probably not worth the effort. - -// // Set the bits of fcc based on LT, GT, EQ, and UO -// assign fcc[0] = LT | UO; -// assign fcc[1] = GT | UO; - -// always_comb begin -// case (FOpCtrlM[2:0]) -// 3'b111: FCmpResultM = LT ? A : B;//min -// 3'b101: FCmpResultM = GT ? A : B;//max -// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal -// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than -// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal -// default: FCmpResultM = 64'b0; -// endcase -// end - - -// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv deleted file mode 100644 index 4d0895a7..00000000 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ /dev/null @@ -1,67 +0,0 @@ -/////////////////////////////////////////// -// fpuhazard.sv -// -// Written: me@KatherineParry.com 19 May 2021 -// Modified: -// -// Purpose: Determine forwarding, stalls and flushes for the FPU -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module fpuhazard( - input logic [4:0] Adr1E, Adr2E, Adr3E, - input logic FWriteEnM, FWriteEnW, - input logic [4:0] RdM, RdW, - input logic [2:0] FResultSelM, - output logic FStallD, - output logic [1:0] ForwardXE, ForwardYE, ForwardZE -); - - - always_comb begin - // set ReadData as default - ForwardXE = 2'b00; // choose FRD1E - ForwardYE = 2'b00; // choose FRD2E - ForwardZE = 2'b00; // choose FRD3E - FStallD = 0; - - if ((Adr1E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W - - - if ((Adr2E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W - - - if ((Adr3E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W - - end - -endmodule diff --git a/wally-pipelined/src/fpu/freg.sv b/wally-pipelined/src/fpu/freg.sv deleted file mode 100755 index b7e16713..00000000 --- a/wally-pipelined/src/fpu/freg.sv +++ /dev/null @@ -1,515 +0,0 @@ - -`include "wally-config.vh" -// `include "../../config/rv64icfd/wally-config.vh" //debug - -module freg1adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //this could be done with: - // - //assign readData = regOutput[adr1]; - // - //but always_comb allows for finer control - - - //address decoder - //only 1 for this fp register set - //used with fpsign - //defaults to outputting zeroes - always_comb begin - case(adr1) - 5'b00000 : readData = regOutput[0]; - 5'b00001 : readData = regOutput[1]; - 5'b00010 : readData = regOutput[2]; - 5'b00011 : readData = regOutput[3]; - 5'b00100 : readData = regOutput[4]; - 5'b00101 : readData = regOutput[5]; - 5'b00110 : readData = regOutput[6]; - 5'b00111 : readData = regOutput[7]; - 5'b01000 : readData = regOutput[8]; - 5'b01001 : readData = regOutput[9]; - 5'b01010 : readData = regOutput[10]; - 5'b01011 : readData = regOutput[11]; - 5'b01100 : readData = regOutput[12]; - 5'b01101 : readData = regOutput[13]; - 5'b01110 : readData = regOutput[14]; - 5'b01111 : readData = regOutput[15]; - 5'b10000 : readData = regOutput[16]; - 5'b10001 : readData = regOutput[17]; - 5'b10010 : readData = regOutput[18]; - 5'b10011 : readData = regOutput[19]; - 5'b10100 : readData = regOutput[20]; - 5'b10101 : readData = regOutput[21]; - 5'b10110 : readData = regOutput[22]; - 5'b10111 : readData = regOutput[23]; - 5'b11000 : readData = regOutput[24]; - 5'b11001 : readData = regOutput[25]; - 5'b11010 : readData = regOutput[26]; - 5'b11011 : readData = regOutput[27]; - 5'b11100 : readData = regOutput[28]; - 5'b11101 : readData = regOutput[29]; - 5'b11110 : readData = regOutput[30]; - 5'b11111 : readData = regOutput[31]; - default : readData = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg2adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //2 are used for this fp register set - //used with fpadd/cvt, fpdiv/sqrt, and fpcmp - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg3adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [4:0] adr3, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2, - output logic [`XLEN-1:0] readData3); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [numRegs-1:0] [`XLEN-1:0] regInput; - logic [numRegs-1:0] [`XLEN-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //3 are used for this fp register set - //used exclusively for fma - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - - //address 3 decoder - case(adr3) - 5'b00000 : readData3 = regOutput[0]; - 5'b00001 : readData3 = regOutput[1]; - 5'b00010 : readData3 = regOutput[2]; - 5'b00011 : readData3 = regOutput[3]; - 5'b00100 : readData3 = regOutput[4]; - 5'b00101 : readData3 = regOutput[5]; - 5'b00110 : readData3 = regOutput[6]; - 5'b00111 : readData3 = regOutput[7]; - 5'b01000 : readData3 = regOutput[8]; - 5'b01001 : readData3 = regOutput[9]; - 5'b01010 : readData3 = regOutput[10]; - 5'b01011 : readData3 = regOutput[11]; - 5'b01100 : readData3 = regOutput[12]; - 5'b01101 : readData3 = regOutput[13]; - 5'b01110 : readData3 = regOutput[14]; - 5'b01111 : readData3 = regOutput[15]; - 5'b10000 : readData3 = regOutput[16]; - 5'b10001 : readData3 = regOutput[17]; - 5'b10010 : readData3 = regOutput[18]; - 5'b10011 : readData3 = regOutput[19]; - 5'b10100 : readData3 = regOutput[20]; - 5'b10101 : readData3 = regOutput[21]; - 5'b10110 : readData3 = regOutput[22]; - 5'b10111 : readData3 = regOutput[23]; - 5'b11000 : readData3 = regOutput[24]; - 5'b11001 : readData3 = regOutput[25]; - 5'b11010 : readData3 = regOutput[26]; - 5'b11011 : readData3 = regOutput[27]; - 5'b11100 : readData3 = regOutput[28]; - 5'b11101 : readData3 = regOutput[29]; - 5'b11110 : readData3 = regOutput[30]; - 5'b11111 : readData3 = regOutput[31]; - default : readData3 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01001 : regInput[9] = writeData; - 5'b01010 : regInput[10] = writeData; - 5'b01011 : regInput[11] = writeData; - 5'b01100 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11001 : regInput[25] = writeData; - 5'b11010 : regInput[26] = writeData; - 5'b11011 : regInput[27] = writeData; - 5'b11100 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 62d0e7d7..7df9386c 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,13 +1,12 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); +module fsgn ( + input logic [63:0] SrcXE, SrcYE, + input logic [1:0] SgnOpCodeE, + output logic [63:0] SgnResE, + output logic SgnNVE); - input [63:0] SrcXE, SrcYE; - input [1:0] SgnOpCodeE; - output [63:0] SgnResultE; - output [4:0] SgnFlagsE; - - wire AonesExp; + logic AonesExp; //op code designation: // @@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = SrcXE[62:0]; + assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will @@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN - assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0}; + assign SgnNVE = AonesExp & SgnResE[63]; endmodule diff --git a/wally-pipelined/src/fpu/ling_bk13.sv b/wally-pipelined/src/fpu/ling_bk13.sv deleted file mode 100755 index a35c7a8f..00000000 --- a/wally-pipelined/src/fpu/ling_bk13.sv +++ /dev/null @@ -1,89 +0,0 @@ -// Brent-Kung Prefix Adder - -module ling_bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g; - wire [13:1] h,c; - -// pre-computation - assign p={a|b,1'b1}; - assign g={a&b, cin}; - -// prefix tree - ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]); - -// post-computation - assign h[13]=g[13]|c[13]; - assign sum=p[13:1]^h|g[13:1]&c; - assign cout=p[13]&h[13]; - -endmodule - -module ling_brent_kung (h, c, p, g); - - input [12:0] p; - input [13:0] g; - output [13:1] h; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates H/I pairs that span 1 bits - rgry g_1_0 (H_1_0, {g[1],g[0]}); - rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]}); - rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]}); - rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]}); - rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]}); - rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]}); - rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]}); - - // Stage 2: Generates H/I pairs that span 2 bits - grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2); - black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4}); - black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8}); - - // Stage 3: Generates H/I pairs that span 4 bits - grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4); - - // Stage 4: Generates H/I pairs that span 8 bits - - // Stage 5: Generates H/I pairs that span 4 bits - grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8); - - // Stage 6: Generates H/I pairs that span 2 bits - grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4); - grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8); - - // Last grey cell stage - grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]); - grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]); - grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]); - grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]); - grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]); - grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]); - - // Final Stage: Apply c_k+1=p_k&H_k_0 - assign c[1]=g[0]; - - assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0; - assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0; - assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0; - assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0; - assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0; - assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0; - assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0; - assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0; - - assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0; - assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0; - assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0; - assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0; - -endmodule - - diff --git a/wally-pipelined/src/fpu/lzd_denorm.sv b/wally-pipelined/src/fpu/lzd_denorm.sv index 21efbf5f..860a3381 100755 --- a/wally-pipelined/src/fpu/lzd_denorm.sv +++ b/wally-pipelined/src/fpu/lzd_denorm.sv @@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B); endmodule // lz52 + diff --git a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv old mode 100755 new mode 100644 diff --git a/wally-pipelined/src/fpu/rounder_denorm.sv b/wally-pipelined/src/fpu/rounder_denorm.sv index 70df0656..b6793594 100755 --- a/wally-pipelined/src/fpu/rounder_denorm.sv +++ b/wally-pipelined/src/fpu/rounder_denorm.sv @@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign B_12_overflow = {8'h0, 3'b0, normal_overflow}; assign B_12_underflow = {8'h0, 3'b0, normal_underflow}; - cla52 add1(Tmant, Cout, A[62:11], B); + cla52 add1(Tmant, Cout, A[62:11], B); //***adder - cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); + cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder - cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); + cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder // Now that rounding is done, we compute the final exponent // and test for special cases. diff --git a/wally-pipelined/src/fpu/sbtm_a4.sv b/wally-pipelined/src/fpu/sbtm_a4.sv deleted file mode 100755 index 7ffe4c61..00000000 --- a/wally-pipelined/src/fpu/sbtm_a4.sv +++ /dev/null @@ -1,204 +0,0 @@ -module sbtm_a4 (input logic [7:0] a, - output logic [13:0] y); - always_comb - case(a) - 8'b01000000: y = 14'b10110100010111; - 8'b01000001: y = 14'b10110010111111; - 8'b01000010: y = 14'b10110001101000; - 8'b01000011: y = 14'b10110000010011; - 8'b01000100: y = 14'b10101111000001; - 8'b01000101: y = 14'b10101101110000; - 8'b01000110: y = 14'b10101100100001; - 8'b01000111: y = 14'b10101011010011; - 8'b01001000: y = 14'b10101010000111; - 8'b01001001: y = 14'b10101000111101; - 8'b01001010: y = 14'b10100111110100; - 8'b01001011: y = 14'b10100110101101; - 8'b01001100: y = 14'b10100101100111; - 8'b01001101: y = 14'b10100100100010; - 8'b01001110: y = 14'b10100011011111; - 8'b01001111: y = 14'b10100010011101; - 8'b01010000: y = 14'b10100001011100; - 8'b01010001: y = 14'b10100000011100; - 8'b01010010: y = 14'b10011111011110; - 8'b01010011: y = 14'b10011110100001; - 8'b01010100: y = 14'b10011101100100; - 8'b01010101: y = 14'b10011100101001; - 8'b01010110: y = 14'b10011011101111; - 8'b01010111: y = 14'b10011010110110; - 8'b01011000: y = 14'b10011001111110; - 8'b01011001: y = 14'b10011001000110; - 8'b01011010: y = 14'b10011000010000; - 8'b01011011: y = 14'b10010111011011; - 8'b01011100: y = 14'b10010110100110; - 8'b01011101: y = 14'b10010101110011; - 8'b01011110: y = 14'b10010101000000; - 8'b01011111: y = 14'b10010100001110; - 8'b01100000: y = 14'b10010011011100; - 8'b01100001: y = 14'b10010010101100; - 8'b01100010: y = 14'b10010001111100; - 8'b01100011: y = 14'b10010001001101; - 8'b01100100: y = 14'b10010000011111; - 8'b01100101: y = 14'b10001111110001; - 8'b01100110: y = 14'b10001111000100; - 8'b01100111: y = 14'b10001110011000; - 8'b01101000: y = 14'b10001101101100; - 8'b01101001: y = 14'b10001101000001; - 8'b01101010: y = 14'b10001100010110; - 8'b01101011: y = 14'b10001011101100; - 8'b01101100: y = 14'b10001011000011; - 8'b01101101: y = 14'b10001010011010; - 8'b01101110: y = 14'b10001001110010; - 8'b01101111: y = 14'b10001001001010; - 8'b01110000: y = 14'b10001000100011; - 8'b01110001: y = 14'b10000111111101; - 8'b01110010: y = 14'b10000111010111; - 8'b01110011: y = 14'b10000110110001; - 8'b01110100: y = 14'b10000110001100; - 8'b01110101: y = 14'b10000101100111; - 8'b01110110: y = 14'b10000101000011; - 8'b01110111: y = 14'b10000100011111; - 8'b01111000: y = 14'b10000011111100; - 8'b01111001: y = 14'b10000011011001; - 8'b01111010: y = 14'b10000010110111; - 8'b01111011: y = 14'b10000010010101; - 8'b01111100: y = 14'b10000001110011; - 8'b01111101: y = 14'b10000001010010; - 8'b01111110: y = 14'b10000000110001; - 8'b01111111: y = 14'b10000000010001; - 8'b10000000: y = 14'b01111111110001; - 8'b10000001: y = 14'b01111111010001; - 8'b10000010: y = 14'b01111110110010; - 8'b10000011: y = 14'b01111110010011; - 8'b10000100: y = 14'b01111101110101; - 8'b10000101: y = 14'b01111101010110; - 8'b10000110: y = 14'b01111100111001; - 8'b10000111: y = 14'b01111100011011; - 8'b10001000: y = 14'b01111011111110; - 8'b10001001: y = 14'b01111011100001; - 8'b10001010: y = 14'b01111011000100; - 8'b10001011: y = 14'b01111010101000; - 8'b10001100: y = 14'b01111010001100; - 8'b10001101: y = 14'b01111001110000; - 8'b10001110: y = 14'b01111001010101; - 8'b10001111: y = 14'b01111000111010; - 8'b10010000: y = 14'b01111000011111; - 8'b10010001: y = 14'b01111000000100; - 8'b10010010: y = 14'b01110111101010; - 8'b10010011: y = 14'b01110111010000; - 8'b10010100: y = 14'b01110110110110; - 8'b10010101: y = 14'b01110110011101; - 8'b10010110: y = 14'b01110110000100; - 8'b10010111: y = 14'b01110101101011; - 8'b10011000: y = 14'b01110101010010; - 8'b10011001: y = 14'b01110100111001; - 8'b10011010: y = 14'b01110100100001; - 8'b10011011: y = 14'b01110100001001; - 8'b10011100: y = 14'b01110011110001; - 8'b10011101: y = 14'b01110011011010; - 8'b10011110: y = 14'b01110011000010; - 8'b10011111: y = 14'b01110010101011; - 8'b10100000: y = 14'b01110010010100; - 8'b10100001: y = 14'b01110001111110; - 8'b10100010: y = 14'b01110001100111; - 8'b10100011: y = 14'b01110001010001; - 8'b10100100: y = 14'b01110000111011; - 8'b10100101: y = 14'b01110000100101; - 8'b10100110: y = 14'b01110000001111; - 8'b10100111: y = 14'b01101111111010; - 8'b10101000: y = 14'b01101111100101; - 8'b10101001: y = 14'b01101111010000; - 8'b10101010: y = 14'b01101110111011; - 8'b10101011: y = 14'b01101110100110; - 8'b10101100: y = 14'b01101110010001; - 8'b10101101: y = 14'b01101101111101; - 8'b10101110: y = 14'b01101101101001; - 8'b10101111: y = 14'b01101101010101; - 8'b10110000: y = 14'b01101101000001; - 8'b10110001: y = 14'b01101100101101; - 8'b10110010: y = 14'b01101100011010; - 8'b10110011: y = 14'b01101100000110; - 8'b10110100: y = 14'b01101011110011; - 8'b10110101: y = 14'b01101011100000; - 8'b10110110: y = 14'b01101011001101; - 8'b10110111: y = 14'b01101010111010; - 8'b10111000: y = 14'b01101010101000; - 8'b10111001: y = 14'b01101010010101; - 8'b10111010: y = 14'b01101010000011; - 8'b10111011: y = 14'b01101001110001; - 8'b10111100: y = 14'b01101001011111; - 8'b10111101: y = 14'b01101001001101; - 8'b10111110: y = 14'b01101000111100; - 8'b10111111: y = 14'b01101000101010; - 8'b11000000: y = 14'b01101000011001; - 8'b11000001: y = 14'b01101000000111; - 8'b11000010: y = 14'b01100111110110; - 8'b11000011: y = 14'b01100111100101; - 8'b11000100: y = 14'b01100111010100; - 8'b11000101: y = 14'b01100111000011; - 8'b11000110: y = 14'b01100110110011; - 8'b11000111: y = 14'b01100110100010; - 8'b11001000: y = 14'b01100110010010; - 8'b11001001: y = 14'b01100110000010; - 8'b11001010: y = 14'b01100101110010; - 8'b11001011: y = 14'b01100101100001; - 8'b11001100: y = 14'b01100101010010; - 8'b11001101: y = 14'b01100101000010; - 8'b11001110: y = 14'b01100100110010; - 8'b11001111: y = 14'b01100100100011; - 8'b11010000: y = 14'b01100100010011; - 8'b11010001: y = 14'b01100100000100; - 8'b11010010: y = 14'b01100011110101; - 8'b11010011: y = 14'b01100011100101; - 8'b11010100: y = 14'b01100011010110; - 8'b11010101: y = 14'b01100011000111; - 8'b11010110: y = 14'b01100010111001; - 8'b11010111: y = 14'b01100010101010; - 8'b11011000: y = 14'b01100010011011; - 8'b11011001: y = 14'b01100010001101; - 8'b11011010: y = 14'b01100001111110; - 8'b11011011: y = 14'b01100001110000; - 8'b11011100: y = 14'b01100001100010; - 8'b11011101: y = 14'b01100001010100; - 8'b11011110: y = 14'b01100001000110; - 8'b11011111: y = 14'b01100000111000; - 8'b11100000: y = 14'b01100000101010; - 8'b11100001: y = 14'b01100000011100; - 8'b11100010: y = 14'b01100000001111; - 8'b11100011: y = 14'b01100000000001; - 8'b11100100: y = 14'b01011111110100; - 8'b11100101: y = 14'b01011111100110; - 8'b11100110: y = 14'b01011111011001; - 8'b11100111: y = 14'b01011111001100; - 8'b11101000: y = 14'b01011110111111; - 8'b11101001: y = 14'b01011110110010; - 8'b11101010: y = 14'b01011110100101; - 8'b11101011: y = 14'b01011110011000; - 8'b11101100: y = 14'b01011110001011; - 8'b11101101: y = 14'b01011101111110; - 8'b11101110: y = 14'b01011101110010; - 8'b11101111: y = 14'b01011101100101; - 8'b11110000: y = 14'b01011101011001; - 8'b11110001: y = 14'b01011101001100; - 8'b11110010: y = 14'b01011101000000; - 8'b11110011: y = 14'b01011100110100; - 8'b11110100: y = 14'b01011100101000; - 8'b11110101: y = 14'b01011100011100; - 8'b11110110: y = 14'b01011100010000; - 8'b11110111: y = 14'b01011100000100; - 8'b11111000: y = 14'b01011011111000; - 8'b11111001: y = 14'b01011011101100; - 8'b11111010: y = 14'b01011011100000; - 8'b11111011: y = 14'b01011011010101; - 8'b11111100: y = 14'b01011011001001; - 8'b11111101: y = 14'b01011010111101; - 8'b11111110: y = 14'b01011010110010; - 8'b11111111: y = 14'b01011010100111; - default: y = 14'bxxxxxxxxxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - \ No newline at end of file diff --git a/wally-pipelined/src/fpu/sk14.sv b/wally-pipelined/src/fpu/sk14.sv deleted file mode 100755 index 8d6aadb5..00000000 --- a/wally-pipelined/src/fpu/sk14.sv +++ /dev/null @@ -1,90 +0,0 @@ -// Sklansky Prefix Adder - -module sk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - sklansky prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module sklansky (c, p, g); - - input [14:0] p; - input [14:0] g; - output [14:1] c; - - - // parallel-prefix, Sklansky - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - // Stage 2: Generates G/P pairs that span 2 bits - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4}); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4); - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8}); - black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8}); - black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8}); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8); - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8); - grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8); - grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8); - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule - From 308c9ccaac43d50c30e90db28a558af0382feaee Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 2 Jul 2021 12:53:05 -0400 Subject: [PATCH 30/38] FPU update - missing files --- wally-pipelined/src/fpu/fclassify.sv | 62 ++++ wally-pipelined/src/fpu/fcmp.sv | 465 +++++++++++++++++++++++++++ wally-pipelined/src/fpu/fdivsqrt.sv | 256 +++++++++++++++ wally-pipelined/src/fpu/fhazard.sv | 67 ++++ wally-pipelined/src/fpu/fregfile.sv | 54 ++++ 5 files changed, 904 insertions(+) create mode 100644 wally-pipelined/src/fpu/fclassify.sv create mode 100755 wally-pipelined/src/fpu/fcmp.sv create mode 100755 wally-pipelined/src/fpu/fdivsqrt.sv create mode 100644 wally-pipelined/src/fpu/fhazard.sv create mode 100644 wally-pipelined/src/fpu/fregfile.sv diff --git a/wally-pipelined/src/fpu/fclassify.sv b/wally-pipelined/src/fpu/fclassify.sv new file mode 100644 index 00000000..a15edcb4 --- /dev/null +++ b/wally-pipelined/src/fpu/fclassify.sv @@ -0,0 +1,62 @@ + +`include "wally-config.vh" + +module fclassify ( + input logic [63:0] SrcXE, + input logic FmtE, // 0-Single 1-Double + output logic [63:0] ClassResE + ); + + logic [31:0] Single; + logic [63:0] Double; + logic Sgn; + logic Inf, NaN, Zero, Norm, Denorm; + logic PInf, QNaN, PZero, PNorm, PDenorm; + logic NInf, SNaN, NZero, NNorm, NDenorm; + logic MaxExp, ExpZero, ManZero, FirstBitFrac; + + // Single and Double precision layouts + assign Single = SrcXE[63:32]; + assign Double = SrcXE; + assign Sgn = SrcXE[63]; + + // basic calculations for readabillity + + assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23]; + assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23]; + assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0]; + assign FirstBitFrac = FmtE ? Double[51] : Single[22]; + + // determine the type of number + assign NaN = MaxExp & ~ManZero; + assign Inf = MaxExp & ManZero; + assign Zero = ExpZero & ManZero; + assign Denorm= ExpZero & ~ManZero; + assign Norm = ~ExpZero; + + // determine the sub categories + assign QNaN = FirstBitFrac&NaN; + assign SNaN = ~FirstBitFrac&NaN; + assign PInf = ~Sgn&Inf; + assign NInf = Sgn&Inf; + assign PNorm = ~Sgn&Norm; + assign NNorm = Sgn&Norm; + assign PDenorm = ~Sgn&Denorm; + assign NDenorm = Sgn&Denorm; + assign PZero = ~Sgn&Zero; + assign NZero = Sgn&Zero; + + // determine sub category and combine into the result + // bit 0 - -Inf + // bit 1 - -Norm + // bit 2 - -Denorm + // bit 3 - -Zero + // bit 4 - +Zero + // bit 5 - +Denorm + // bit 6 - +Norm + // bit 7 - +Inf + // bit 8 - signaling NaN + // bit 9 - quiet NaN + assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + +endmodule diff --git a/wally-pipelined/src/fpu/fcmp.sv b/wally-pipelined/src/fpu/fcmp.sv new file mode 100755 index 00000000..f47d7c9e --- /dev/null +++ b/wally-pipelined/src/fpu/fcmp.sv @@ -0,0 +1,465 @@ + +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + +`include "wally-config.vh" +module fcmp ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, + + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] CmpResE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); + + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + magcompare64b_2 magcomp2 (LT, EQ, w, x); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); + +endmodule // fpcomp + +// module magcompare2b (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +// endmodule // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +module magcompare2c (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + assign LT = B[1] | (!A[1]&B[0]); + assign GT = A[1] | (!B[1]&A[0]); + +endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_1 (w, x, A, B); + + input logic [63:0] A; + input logic [63:0] B; + + logic [31:0] s; + logic [31:0] t; + logic [15:0] u; + logic [15:0] v; + output logic [7:0] w; + output logic [7:0] x; + + magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]); + magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]); + magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]); + magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]); + magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]); + magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]); + magcompare2b magA(s[9], t[9], A[19:18], B[19:18]); + magcompare2b magB(s[10], t[10], A[21:20], B[21:20]); + magcompare2b magC(s[11], t[11], A[23:22], B[23:22]); + magcompare2b magD(s[12], t[12], A[25:24], B[25:24]); + magcompare2b magE(s[13], t[13], A[27:26], B[27:26]); + magcompare2b magF(s[14], t[14], A[29:28], B[29:28]); + magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]); + magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]); + magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]); + magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]); + magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]); + magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]); + magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]); + magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]); + magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]); + magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]); + magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]); + magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]); + magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]); + magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]); + magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]); + magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]); + magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]); + + magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]); + magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]); + magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]); + magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]); + magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]); + magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]); + magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]); + magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]); + magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]); + magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]); + magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]); + magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]); + magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]); + magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]); + magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]); + magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]); + + magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]); + magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]); + magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]); + magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]); + magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]); + magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]); + magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]); + magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); + + input logic [63:0] A; + input logic [63:0] B; + input logic [2:0] FOpCtrlE; + + logic dp, sp, hp; + + output logic ANaN; + output logic BNaN; + output logic Azero; + output logic Bzero; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Test if A or B is NaN. + assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & + ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | + (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | + (hp&(A[57]|A[56]))); + + assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & + ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | + (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | + (hp&(B[57]|B[56]))); + + // Test if A is +0 or -0 when viewed as a floating point number (i.e, + // the 63 least siginficant bits of A are zero). + // Depending on how this synthesizes, it may work better to replace + // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) + assign Azero = (A[62:0] == 63'h0); + assign Bzero = (B[62:0] == 63'h0); + +endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] CmpResE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: CmpResE = LT ? A : B;//min + 3'b101: CmpResE = GT ? A : B;//max + 3'b010: CmpResE = {63'b0, EQ};//equal + 3'b001: CmpResE = {63'b0, LT};//less than + 3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal + default: CmpResE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fdivsqrt.sv b/wally-pipelined/src/fpu/fdivsqrt.sv new file mode 100755 index 00000000..6d8da23f --- /dev/null +++ b/wally-pipelined/src/fpu/fdivsqrt.sv @@ -0,0 +1,256 @@ +// +// File name : fpdiv +// Title : Floating-Point Divider/Square-Root +// project : FPU +// Library : fpdiv +// Author(s) : James E. Stine, Jr. +// Purpose : definition of main unit to floating-point div/sqrt +// notes : +// +// Copyright Oklahoma State University +// +// Basic Operations +// +// Step 1: Load operands, set flags, and convert SP to DP +// Step 2: Check for special inputs ( +/- Infinity, NaN) +// Step 3: Exponent Logic +// Step 4: Divide/Sqrt using Goldschmidt +// Step 5: Normalize the result.// +// Shift left until normalized. Normalized when the value to the +// left of the binrary point is 1. +// Step 6: Round the result.// +// Step 7: Put quotient/remainder onto output. +// + +// `timescale 1ps/1ps +module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, + FDivStartE, reset, clk, FDivBusyE, HoldInputs); + + input [63:0] DivInput1E; // 1st input operand (A) + input [63:0] DivInput2E; // 2nd input operand (B) + input [2:0] FrmE; // Rounding mode - specify values + input DivOpType; // Function opcode + input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this + input DivOvEn; // Overflow trap enabled + input DivUnEn; // Underflow trap enabled + + input FDivStartE; + input reset; + input clk; + + output [63:0] FDivResultM; // Result of operation + output [4:0] FDivSqrtFlgM; // IEEE exception flags + output FDivSqrtDoneE; + output FDivBusyE, HoldInputs; + + supply1 vdd; + supply0 vss; + + wire [63:0] Float1; + wire [63:0] Float2; + wire [63:0] IntValue; + + wire DivDenormM; // DivDenormM on input or output + wire [12:0] exp1, exp2, expF; + wire [12:0] exp_diff, bias; + wire [13:0] exp_sqrt; + wire [12:0] exp_s; + wire [12:0] exp_c; + + wire [10:0] exponent, exp_pre; + wire [63:0] Result; + wire [52:0] mantissaA; + wire [52:0] mantissaB; + wire [63:0] sum, sum_tc, sum_corr, sum_norm; + + wire [5:0] align_shift; + wire [5:0] norm_shift; + wire [2:0] sel_inv; + wire op1_Norm, op2_Norm; + wire opA_Norm, opB_Norm; + wire Invalid; + wire DenormIn, DenormIO; + wire [4:0] FlagsIn; + wire exp_gt63; + wire Sticky_out; + wire signResult, sign_corr; + wire corr_sign; + wire zeroB; + wire convert; + wire swap; + wire sub; + + wire [63:0] q1, qm1, qp1, q0, qm0, qp0; + wire [63:0] rega_out, regb_out, regc_out, regd_out; + wire [127:0] regr_out; + wire [2:0] sel_muxa, sel_muxb; + wire sel_muxr; + wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs; + + wire donev, sel_muxrv, sel_muxsv; + wire [1:0] sel_muxav, sel_muxbv; + wire load_regav, load_regbv, load_regcv; + wire load_regrv, load_regsv; + + logic exp_cout1, exp_cout2, exp_odd, open; + + + // Convert the input operands to their appropriate forms based on + // the orignal operands, the DivOpType , and their precision FmtE. + // Single precision inputs are converted to double precision + // and the sign of the first operand is set appropratiately based on + // if the operation is absolute value or negation. + convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); + + // Test for exceptions and return the "Invalid Operation" and + // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in + // the third pipeline stage to select the result. Also, op1_Norm + // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. + // sub is one if the effective operation is subtaction. + exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, + Float1, Float2, DivOpType); + + // Determine Sign/Mantissa + assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType; + assign mantissaA = {vdd, Float1[51:0]}; + assign mantissaB = {vdd, Float2[51:0]}; + // Perform Exponent Subtraction - expA - expB + Bias + assign exp1 = {2'b0, Float1[62:52]}; + assign exp2 = {2'b0, Float2[62:52]}; + // bias : DP = 2^{11-1}-1 = 1023 + assign bias = {3'h0, 10'h3FF}; + // Divide exponent + csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder + exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder? + {vss, exp_s}, {vss, exp_c}, 1'b1); + // Sqrt exponent (check if exponent is odd) + assign exp_odd = Float1[52] ? vss : vdd; + exp_add explogic2 (exp_cout2, exp_sqrt, //***adder? + {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); + // Choose correct exponent + assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; + + // Main Goldschmidt/Division Routine + divconv goldy (q1, qm1, qp1, q0, qm0, qp0, + rega_out, regb_out, regc_out, regd_out, + regr_out, mantissaB, mantissaA, + sel_muxa, sel_muxb, sel_muxr, + reset, clk, + load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, FmtE, DivOpType, exp_odd); + + // FSM : control divider + fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, + clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs); + + // Round the mantissa to a 52-bit value, with the leading one + // removed. The rounding units also handles special cases and + // set the exception flags. + //***add max magnitude and swap negitive and positive infinity + rounder_div divround1 (Result, DenormIO, FlagsIn, + FrmE, FmtE, DivOvEn, DivUnEn, expF, + sel_inv, Invalid, DenormIn, signResult, + q1, qm1, qp1, q0, qm0, qp0, regr_out); + + // Store the final result and the exception flags in registers. + flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); + flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); + flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM); + +endmodule // fpadd + +// +// Brent-Kung Prefix Adder +// (yes, it is 14 bits as my generator is broken for 13 bits :( +// assume, synthesizer will delete stuff not needed ) +// +module exp_add (cout, sum, a, b, cin); + + input [13:0] a, b; + input cin; + + output [13:0] sum; + output cout; + + wire [14:0] p,g; + wire [13:0] c; + + // pre-computation + assign p={a^b,1'b0}; + assign g={a&b, cin}; + + // prefix tree + brent_kung prefix_tree(c, p[13:0], g[13:0]); + + // post-computation + assign sum=p[14:1]^c; + assign cout=g[14]|(p[14]&c[13]); + +endmodule // exp_add + +module brent_kung (c, p, g); + + input [13:0] p; + input [13:0] g; + output [14:1] c; + + logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8; + logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8; + logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; + // parallel-prefix, Brent-Kung + + // Stage 1: Generates G/FmtE pairs that span 1 bits + grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); + black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); + black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); + black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); + black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); + black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); + black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); + + // Stage 2: Generates G/FmtE pairs that span 2 bits + grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); + black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); + black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); + + // Stage 3: Generates G/FmtE pairs that span 4 bits + grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); + + // Stage 4: Generates G/FmtE pairs that span 8 bits + + // Stage 5: Generates G/FmtE pairs that span 4 bits + grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); + + // Stage 6: Generates G/FmtE pairs that span 2 bits + grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); + grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); + grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); + + // Last grey cell stage + grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); + grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); + grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); + grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); + grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); + grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); + + // Final Stage: Apply c_k+1=G_k_0 + assign c[1]=g[0]; + assign c[2]=G_1_0; + assign c[3]=G_2_0; + assign c[4]=G_3_0; + assign c[5]=G_4_0; + assign c[6]=G_5_0; + assign c[7]=G_6_0; + assign c[8]=G_7_0; + assign c[9]=G_8_0; + + assign c[10]=G_9_0; + assign c[11]=G_10_0; + assign c[12]=G_11_0; + assign c[13]=G_12_0; + assign c[14]=G_13_0; + +endmodule // brent_kung + diff --git a/wally-pipelined/src/fpu/fhazard.sv b/wally-pipelined/src/fpu/fhazard.sv new file mode 100644 index 00000000..53f7dde2 --- /dev/null +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -0,0 +1,67 @@ +/////////////////////////////////////////// +// fpuhazard.sv +// +// Written: me@KatherineParry.com 19 May 2021 +// Modified: +// +// Purpose: Determine forwarding, stalls and flushes for the FPU +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fhazard( + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE +); + + + always_comb begin + // set ReadData as default + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W + + + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W + + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W + + end + +endmodule diff --git a/wally-pipelined/src/fpu/fregfile.sv b/wally-pipelined/src/fpu/fregfile.sv new file mode 100644 index 00000000..78c24b3e --- /dev/null +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -0,0 +1,54 @@ +/////////////////////////////////////////// +// regfile.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: 4-port register file +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fregfile ( + input logic clk, reset, + input logic we4, + input logic [ 4:0] a1, a2, a3, a4, + input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32 + output logic [63:0] rd1, rd2, rd3); + + logic [63:0] rf[31:0]; + integer i; + + // three ported register file + // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) + // write fourth port on rising edge of clock (A4/WD4/WE4) + // write occurs on falling edge of clock + + // reset is intended for simulation only, not synthesis + + always_ff @(negedge clk or posedge reset) + if (reset) for(i=0; i<32; i++) rf[i] <= 0; + else if (we4) rf[a4] <= wd4; + + assign #2 rd1 = rf[a1]; + assign #2 rd2 = rf[a2]; + assign #2 rd3 = rf[a3]; + +endmodule // regfile + From 8e3149517a33de84f352d85282eaaf703e65ee2e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 2 Jul 2021 15:45:05 -0500 Subject: [PATCH 31/38] Fixed up the bit widths on the page table walker for rv32. --- wally-pipelined/src/mmu/pagetablewalker.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index a8f9500f..0d771479 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -59,7 +59,7 @@ module pagetablewalker input logic HPTWStall, // *** modify to send to LSU - output logic [`XLEN-1:0] MMUPAdr, + output logic [`XLEN-1:0] MMUPAdr, // this probalby should be `PA_BITS wide output logic MMUTranslate, // *** rename to HPTWReq output logic HPTWRead, @@ -234,7 +234,7 @@ module pagetablewalker PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux? DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; end // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. else if (ValidPTE && ~LeafPTE) begin @@ -263,7 +263,7 @@ module pagetablewalker PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; DTLBWriteM = DTLBMissMQ; ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; end else begin NextWalkerState = FAULT; end From cf688bd3f6c9f9b5030df2d3df874ea2f91fa036 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 2 Jul 2021 15:49:32 -0500 Subject: [PATCH 32/38] Fixed up the physical address generation for 64 bit page table walker. --- wally-pipelined/src/mmu/pagetablewalker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 0d771479..d0d2152f 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -546,7 +546,7 @@ module pagetablewalker // Assign outputs to ahblite // *** Currently truncate address to 32 bits. This must be changed if // we support larger physical address spaces - assign MMUPAdr = {{(`XLEN-32){1'b0}}, TranslationPAdr[31:0]}; + assign MMUPAdr = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]}; end endgenerate From 0bd18ff662d67995f3f416a06a9569bfa8a8fbfb Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 2 Jul 2021 17:08:13 -0400 Subject: [PATCH 33/38] Fixed PMPCFG read faults --- wally-pipelined/src/privileged/csrm.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index f30ebb4f..8c74b951 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -187,9 +187,9 @@ module csrm #(parameter (CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; - else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/8) begin - if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][`XLEN-1:0]; - else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][63:32]}; + else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin + if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0]; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]}; end else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; From 59b177beac982097afd36de8c6a5f1a67f157cd5 Mon Sep 17 00:00:00 2001 From: Ben Bracker Date: Fri, 2 Jul 2021 17:22:09 -0500 Subject: [PATCH 34/38] stop busybear from hanging --- wally-pipelined/regression/wally-busybear-batch.do | 1 + wally-pipelined/regression/wally-busybear.do | 3 ++- wally-pipelined/testbench/testbench-linux.sv | 7 +++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/regression/wally-busybear-batch.do b/wally-pipelined/regression/wally-busybear-batch.do index a4a80eb7..e2817dfa 100644 --- a/wally-pipelined/regression/wally-busybear-batch.do +++ b/wally-pipelined/regression/wally-busybear-batch.do @@ -35,5 +35,6 @@ vopt work_busybear.testbench -o workopt_busybear vsim workopt_busybear -suppress 8852,12070 +run -all run -all quit diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 11876dde..204d1c4e 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -35,9 +35,10 @@ vopt +acc work.testbench -o workopt vsim workopt -suppress 8852,12070 -do ./wave-dos/linux-waves.do #-- Run the Simulation run -all +do ./wave-dos/linux-waves.do +run -all ##quit diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 6676d1a7..8f8a5d44 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,8 +27,8 @@ module testbench(); - parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim - + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*2400000; // # of instructions at which to turn on waves in graphical sim + parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can) /////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////// DUT ///////////////////////////////////// @@ -248,6 +248,9 @@ module testbench(); if (instrs == waveOnICount) begin $display("turning on waves at %0d instructions", instrs); $stop; + end else if (instrs == stopICount && stopICount != 0) begin + $display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs); + $stop; end // Check if PCD is going to be flushed due to a branch or jump From d44916dacf691c33b8e171b34a1d5bfe362ba273 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 3 Jul 2021 02:25:31 -0400 Subject: [PATCH 35/38] Cleaned up PMA/PMP checker unused code --- wally-pipelined/src/mmu/pmachecker.sv | 4 -- wally-pipelined/src/mmu/pmpadrdec.sv | 64 ++------------------------- wally-pipelined/src/mmu/pmpchecker.sv | 45 +------------------ 3 files changed, 5 insertions(+), 108 deletions(-) diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 8d31f3ae..26d8ac87 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -32,9 +32,6 @@ module pmachecker ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, -// input logic [31:0] HADDR, -// input logic [2:0] HSIZE, -// input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused. input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use. @@ -46,7 +43,6 @@ module pmachecker ( output logic PMAStoreAccessFaultM ); - // logic BootTim, Tim, CLINT, GPIO, UART, PLIC; logic PMAAccessFault; logic AccessRW, AccessRWX, AccessRX; logic [5:0] SelRegions; diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 3286368f..bdd46bef 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -31,7 +31,6 @@ module pmpadrdec ( input logic [`PA_BITS-1:0] PhysicalAddress, -// input logic [31:0] HADDR, // *** replace with PAdr input logic [1:0] AdrMode, input logic [`XLEN-1:0] CurrentPMPAdr, input logic AdrAtLeastPreviousPMP, @@ -46,20 +45,19 @@ module pmpadrdec ( logic TORMatch, NAMatch; logic AdrBelowCurrentPMP; logic [`PA_BITS-1:0] CurrentAdrFull; -// logic [`PA_BITS-1:0] FakePhysAdr; - // ***replace this when the true physical address from MMU is available -// assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR}; + // The two lsb of the physical address don't matter for this checking. + // The following code includes them, but hardwires the PMP checker lsbs to 00 + // and masks them later. Logic synthesis should optimize away these bottom bits. // Top-of-range (TOR) // Append two implicit trailing 0's to PMPAdr value assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00}; - assign AdrBelowCurrentPMP = PhysicalAddress < CurrentAdrFull; // *** make sure unsigned comparison works correctly + assign AdrBelowCurrentPMP = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP; assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP; // Naturally aligned regions - // *** should be able to optimize away bottom 2 bits // verilator lint_off UNOPTFLAT logic [`PA_BITS-1:0] Mask; @@ -76,60 +74,6 @@ module pmpadrdec ( assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask); - /* generate - if (`XLEN == 32 || `XLEN == 64) begin // ***redo for various sizes - // priority encoder to translate address to range - // *** We'd like to replace this with a better priority encoder - // *** We should not be truncating 64 bit physical addresses to 32 bits... - // *** there is an easy combinatinoal way to do this with a cascade of AND gates O(32) rather than O(32^2) dh - always_comb - if (AdrMode == NA4) Range = (2**2) - 1; - else casez (CurrentPMPAdr[31:0]) // NAPOT regions - 32'b???????????????????????????????0: Range = (2**3) - 1; - 32'b??????????????????????????????01: Range = (2**4) - 1; - 32'b?????????????????????????????011: Range = (2**5) - 1; - 32'b????????????????????????????0111: Range = (2**6) - 1; - 32'b???????????????????????????01111: Range = (2**7) - 1; - 32'b??????????????????????????011111: Range = (2**8) - 1; - 32'b?????????????????????????0111111: Range = (2**9) - 1; - 32'b????????????????????????01111111: Range = (2**10) - 1; - 32'b???????????????????????011111111: Range = (2**11) - 1; - 32'b??????????????????????0111111111: Range = (2**12) - 1; - 32'b?????????????????????01111111111: Range = (2**13) - 1; - 32'b????????????????????011111111111: Range = (2**14) - 1; - 32'b???????????????????0111111111111: Range = (2**15) - 1; - 32'b??????????????????01111111111111: Range = (2**16) - 1; - 32'b?????????????????011111111111111: Range = (2**17) - 1; - 32'b????????????????0111111111111111: Range = (2**18) - 1; - 32'b???????????????01111111111111111: Range = (2**19) - 1; - 32'b??????????????011111111111111111: Range = (2**20) - 1; - 32'b?????????????0111111111111111111: Range = (2**21) - 1; - 32'b????????????01111111111111111111: Range = (2**22) - 1; - 32'b???????????011111111111111111111: Range = (2**23) - 1; - 32'b??????????0111111111111111111111: Range = (2**24) - 1; - 32'b?????????01111111111111111111111: Range = (2**25) - 1; - 32'b????????011111111111111111111111: Range = (2**26) - 1; - 32'b???????0111111111111111111111111: Range = (2**27) - 1; - 32'b??????01111111111111111111111111: Range = (2**28) - 1; - 32'b?????011111111111111111111111111: Range = (2**29) - 1; - 32'b????0111111111111111111111111111: Range = (2**30) - 1; - 32'b???01111111111111111111111111111: Range = (2**31) - 1; - 32'b??011111111111111111111111111111: Range = (2**32) - 1; - 32'b?0111111111111111111111111111111: Range = (2**33) - 1; - 32'b01111111111111111111111111111111: Range = (2**34) - 1; - 32'b11111111111111111111111111111111: Range = (2**35) - 1; - default: Range = '0; - endcase - end else begin - assign Range = '0; - end - endgenerate - - // *** Range should not be truncated... but our physical address space is - // currently only 32 bits wide. - // with a bit of combining of range selection, this could be shared with NA4Match *** - assign NAMatch = &((HADDR ~^ CurrentAdrFull) | Range[31:0]);*/ - assign Match = (AdrMode == TOR) ? TORMatch : (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch : 0; diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 409e2ccb..0d45229a 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -29,12 +29,8 @@ `include "wally-config.vh" module pmpchecker ( -// input logic clk, reset, //*** it seems like clk, reset is also not needed here? input logic [`PA_BITS-1:0] PhysicalAddress, -// input logic [31:0] HADDR, - - input logic [1:0] PrivilegeModeW, - + input logic [1:0] PrivilegeModeW, // *** ModelSim has a switch -svinputport which controls whether input ports // are nets (wires) or vars by default. The default setting of this switch is @@ -43,10 +39,6 @@ module pmpchecker ( // this will be understood as a var. However, if we don't supply the `var` // keyword, the compiler warns us that it's interpreting the signal as a var, // which we might not intend. - // However, it's still bad form to pass 512 or 1024 signals across a module - // boundary. It would be better to store the PMP address registers in a module - // somewhere in the CSR hierarchy and do PMP checking _within_ that module, so - // we don't have to pass around 16 whole registers. input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], @@ -61,7 +53,6 @@ module pmpchecker ( // Bit i is high when the address falls in PMP region i logic [`PMP_ENTRIES-1:0] Regions, FirstMatch; - //logic [3:0] MatchedRegion; logic EnforcePMP; logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; @@ -74,7 +65,6 @@ module pmpchecker ( logic [`PMP_ENTRIES-1:0] ActiveRegion; logic [`PMP_ENTRIES-1:0] L_Bits, X_Bits, W_Bits, R_Bits; - //logic InvalidExecute, InvalidWrite, InvalidRead; genvar i,j; @@ -100,11 +90,8 @@ module pmpchecker ( end endgenerate - //assign Match = |Regions; - // verilator lint_off UNOPTFLAT logic [`PMP_ENTRIES-1:0] NoLowerMatch; -// assign NoLowerMatch[0] = 1; generate // verilator lint_off WIDTH for (j=0; j<`PMP_ENTRIES; j = j+8) begin @@ -127,36 +114,6 @@ module pmpchecker ( end // verilator lint_on UNOPTFLAT endgenerate -/* // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region - always_comb - casez (Regions) - 16'b???????????????1: MatchedRegion = 0; - 16'b??????????????10: MatchedRegion = 1; - 16'b?????????????100: MatchedRegion = 2; - 16'b????????????1000: MatchedRegion = 3; - 16'b???????????10000: MatchedRegion = 4; - 16'b??????????100000: MatchedRegion = 5; - 16'b?????????1000000: MatchedRegion = 6; - 16'b????????10000000: MatchedRegion = 7; - 16'b???????100000000: MatchedRegion = 8; - 16'b??????1000000000: MatchedRegion = 9; - 16'b?????10000000000: MatchedRegion = 10; - 16'b????100000000000: MatchedRegion = 11; - 16'b???1000000000000: MatchedRegion = 12; - 16'b??10000000000000: MatchedRegion = 13; - 16'b?100000000000000: MatchedRegion = 14; - 16'b1000000000000000: MatchedRegion = 15; - default: MatchedRegion = 0; // Should only occur if there is no match - endcase - - assign L_Bit = PMPCFG[MatchedRegion][7] && Match; - assign X_Bit = PMPCFG[MatchedRegion][2] && Match; - assign W_Bit = PMPCFG[MatchedRegion][1] && Match; - assign R_Bit = PMPCFG[MatchedRegion][0] && Match; - - assign InvalidExecute = ExecuteAccessF && ~X_Bit; - assign InvalidWrite = WriteAccessM && ~W_Bit; - assign InvalidRead = ReadAccessM && ~R_Bit;*/ // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L_Bits : |ActiveRegion; From 1fa4abf7b6c8ac60c7c491d042deba0b91890a66 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 3 Jul 2021 03:29:33 -0400 Subject: [PATCH 36/38] Changed IMMU ExecuteAccessF to 1 rather than InstrReadF to fix buildroot; simplified PMP checker --- wally-pipelined/src/ebu/ahblite.sv | 1 + wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/mmu/pmpadrdec.sv | 40 ++++++++---- wally-pipelined/src/mmu/pmpchecker.sv | 87 +++++++++------------------ wally-pipelined/src/uncore/uncore.sv | 1 + 5 files changed, 61 insertions(+), 70 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 70f32bf7..84e8f3b6 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -146,6 +146,7 @@ module ahblite ( // (ProposedNextBusState == MMUTRANSLATE); // The PMA and PMP checkers can decide to squash the access + // *** this probably needs to be controlled by the caches rather than EBU dh 7/2/11 assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState; // stall signals diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 04110d9a..ddfd88cc 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -127,7 +127,7 @@ module ifu ( .TLBMiss(ITLBMissF), .TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF), - .ExecuteAccessF(InstrReadF), /// *** Ross Thompson this is definitely wrong. InstrReadF changed to icache read to memory. + .ExecuteAccessF(1'b1), // ***dh -- this should eventually change to only true if an instruction fetch is occurring .AtomicAccessM(1'b0), .ReadAccessM(1'b0), .WriteAccessM(1'b0), diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index bdd46bef..50d399ae 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -31,20 +31,29 @@ module pmpadrdec ( input logic [`PA_BITS-1:0] PhysicalAddress, - input logic [1:0] AdrMode, - input logic [`XLEN-1:0] CurrentPMPAdr, - input logic AdrAtLeastPreviousPMP, - output logic AdrAtLeastCurrentPMP, - output logic Match + input logic [7:0] PMPCfg, + input logic [`XLEN-1:0] PMPAdr, + input logic PAgePMPAdrIn, + input logic NoLowerMatchIn, + output logic PAgePMPAdrOut, + output logic NoLowerMatchOut, + output logic Match, Active, + output logic L, X, W, R ); + localparam TOR = 2'b01; localparam NA4 = 2'b10; localparam NAPOT = 2'b11; logic TORMatch, NAMatch; - logic AdrBelowCurrentPMP; + logic PAltPMPAdr; + logic FirstMatch; logic [`PA_BITS-1:0] CurrentAdrFull; + logic [1:0] AdrMode; + + + assign AdrMode = PMPCfg[4:3]; // The two lsb of the physical address don't matter for this checking. // The following code includes them, but hardwires the PMP checker lsbs to 00 @@ -52,10 +61,10 @@ module pmpadrdec ( // Top-of-range (TOR) // Append two implicit trailing 0's to PMPAdr value - assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00}; - assign AdrBelowCurrentPMP = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison - assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP; - assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP; + assign CurrentAdrFull = {PMPAdr[`PA_BITS-3:0], 2'b00}; + assign PAltPMPAdr = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison + assign PAgePMPAdrOut = ~PAltPMPAdr; + assign TORMatch = PAgePMPAdrIn && PAltPMPAdr; // Naturally aligned regions @@ -68,7 +77,7 @@ module pmpadrdec ( assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region for (i=3; i < `PA_BITS; i=i+1) - assign Mask[i] = Mask[i-1] & CurrentPMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore endgenerate // verilator lint_on UNOPTFLAT @@ -78,5 +87,12 @@ module pmpadrdec ( (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch : 0; -endmodule + assign FirstMatch = NoLowerMatchIn & Match; + assign NoLowerMatchOut = NoLowerMatchIn & ~Match; + assign L = PMPCfg[7] & FirstMatch; + assign X = PMPCfg[2] & FirstMatch; + assign W = PMPCfg[1] & FirstMatch; + assign R = PMPCfg[0] & FirstMatch; + assign Active = |PMPCfg[4:3]; + endmodule diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 0d45229a..27c7e508 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -51,76 +51,49 @@ module pmpchecker ( output logic PMPStoreAccessFaultM ); + // verilator lint_off UNOPTFLAT + // Bit i is high when the address falls in PMP region i - logic [`PMP_ENTRIES-1:0] Regions, FirstMatch; - logic EnforcePMP; - - logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; - - // Bit i is high when the address is greater than or equal to PMPADR[i] - // Used for determining whether TOR PMP regions match - logic [`PMP_ENTRIES-1:0] AboveRegion; - - // Bit i is high if PMP register i is non-null - logic [`PMP_ENTRIES-1:0] ActiveRegion; - - logic [`PMP_ENTRIES-1:0] L_Bits, X_Bits, W_Bits, R_Bits; - + logic EnforcePMP; + logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; + logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches + logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null + logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set + logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match + logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress), - .AdrMode(PMPCFG[0][4:3]), - .CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]), - .AdrAtLeastPreviousPMP(1'b1), - .AdrAtLeastCurrentPMP(AboveRegion[0]), - .Match(Regions[0])); - - assign ActiveRegion[0] = |PMPCFG[0][4:3]; - - generate // *** only for PMP_ENTRIES > 0 - for (i = 1; i < `PMP_ENTRIES; i++) begin - pmpadrdec pmpadrdec(.PhysicalAddress(PhysicalAddress), - .AdrMode(PMPCFG[i][4:3]), - .CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]), - .AdrAtLeastPreviousPMP(AboveRegion[i-1]), - .AdrAtLeastCurrentPMP(AboveRegion[i]), - .Match(Regions[i])); - - assign ActiveRegion[i] = |PMPCFG[i][4:3]; - end - endgenerate - - // verilator lint_off UNOPTFLAT - logic [`PMP_ENTRIES-1:0] NoLowerMatch; + assign PAgePMPAdr[0] = 1'b1; + assign NoLowerMatch[0] = 1'b1; + generate // verilator lint_off WIDTH - for (j=0; j<`PMP_ENTRIES; j = j+8) begin + for (j=0; j<`PMP_ENTRIES; j = j+8) assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; - end // verilator lint_on WIDTH - for (i=0; i<`PMP_ENTRIES; i++) begin - if (i==0) begin - assign FirstMatch[i] = Regions[i]; - assign NoLowerMatch[i] = ~Regions[i]; - end else begin - assign FirstMatch[i] = Regions[i] & NoLowerMatch[i]; - assign NoLowerMatch[i] = NoLowerMatch[i-1] & ~Regions[i]; - end - assign L_Bits[i] = PMPCFG[i][7] & FirstMatch[i]; - assign X_Bits[i] = PMPCFG[i][2] & FirstMatch[i]; - assign W_Bits[i] = PMPCFG[i][1] & FirstMatch[i]; - assign R_Bits[i] = PMPCFG[i][0] & FirstMatch[i]; - end + for (i=0; i<`PMP_ENTRIES; i++) + pmpadrdec pmpadrdec(.PhysicalAddress, + .PMPCfg(PMPCFG[i]), + .PMPAdr(PMPADDR_ARRAY_REGW[i]), + .PAgePMPAdrIn(PAgePMPAdr[i]), + .PAgePMPAdrOut(PAgePMPAdr[i+1]), + .NoLowerMatchIn(NoLowerMatch[i]), + .NoLowerMatchOut(NoLowerMatch[i+1]), + .Match(Match[i]), + .Active(Active[i]), + .L(L[i]), .X(X[i]), .W(W[i]), .R(R[i]) + ); + // verilator lint_on UNOPTFLAT endgenerate // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region - assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L_Bits : |ActiveRegion; + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; - assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X_Bits; - assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W_Bits; - assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R_Bits; + assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X; + assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W; + assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R; assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM; diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index 91aee66b..79f7a0e8 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -74,6 +74,7 @@ module uncore ( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders + // Set access types to all 1 as don't cares because the MMU has already done access checking adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals From eff5a1b90f0ce153b48278ced705de3110e0d67a Mon Sep 17 00:00:00 2001 From: Ben Bracker Date: Sat, 3 Jul 2021 11:11:07 -0500 Subject: [PATCH 37/38] fix ICache indenting --- .../regression/wave-dos/linux-waves.do | 10 +- wally-pipelined/src/cache/ICacheCntrl.sv | 240 +++++++++--------- 2 files changed, 123 insertions(+), 127 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index 63623891..a891c206 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -122,11 +122,11 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW add wave -divider add wave -hex -r /testbench/* diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index ea52130c..b969437e 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -213,179 +213,175 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ICacheStallF = 1'b1; case (CurrState) - STATE_READY: begin - PCMux = 2'b00; - ICacheReadEn = 1'b1; - if (ITLBMissF) begin - NextState = STATE_TLB_MISS; - end else if (hit & ~spill) begin - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; - end else if (hit & spill) begin - spillSave = 1'b1; - PCMux = 2'b10; - NextState = STATE_HIT_SPILL; - end else if (~hit & ~spill) begin - CntReset = 1'b1; - NextState = STATE_MISS_FETCH_WDV; - end else if (~hit & spill) begin - CntReset = 1'b1; - PCMux = 2'b01; - NextState = STATE_MISS_SPILL_FETCH_WDV; - end else begin + PCMux = 2'b00; + ICacheReadEn = 1'b1; + if (ITLBMissF) begin + NextState = STATE_TLB_MISS; + end else if (hit & ~spill) begin + SavePC = 1'b1; + ICacheStallF = 1'b0; NextState = STATE_READY; - end + end else if (hit & spill) begin + spillSave = 1'b1; + PCMux = 2'b10; + NextState = STATE_HIT_SPILL; + end else if (~hit & ~spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_FETCH_WDV; + end else if (~hit & spill) begin + CntReset = 1'b1; + PCMux = 2'b01; + NextState = STATE_MISS_SPILL_FETCH_WDV; + end else begin + NextState = STATE_READY; + end end - // branch 1, hit spill and 2, miss spill hit STATE_HIT_SPILL: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; - if (hit) begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + if (hit) begin NextState = STATE_HIT_SPILL_FINAL; - end else begin - CntReset = 1'b1; + end else begin + CntReset = 1'b1; NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end + end end STATE_HIT_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end + PCMux = 2'b10; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end end STATE_HIT_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; NextState = STATE_HIT_SPILL_MERGE; end STATE_HIT_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; NextState = STATE_HIT_SPILL_FINAL; end STATE_HIT_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - NextState = STATE_READY; - ICacheStallF = 1'b0; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + NextState = STATE_READY; + ICacheStallF = 1'b0; end - // branch 3 miss no spill STATE_MISS_FETCH_WDV: begin - PCMux = 2'b01; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_FETCH_WDV; - end + PCMux = 2'b01; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end end STATE_MISS_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; NextState = STATE_MISS_READ; end STATE_MISS_READ: begin - PCMux = 2'b01; - ICacheReadEn = 1'b1; - NextState = STATE_READY; + PCMux = 2'b01; + ICacheReadEn = 1'b1; + NextState = STATE_READY; end - // branch 4 miss spill hit, and 5 miss spill miss STATE_MISS_SPILL_FETCH_WDV: begin - PCMux = 2'b01; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_FETCH_WDV; - end + PCMux = 2'b01; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_FETCH_WDV; + end end STATE_MISS_SPILL_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_READ1; + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_READ1; end STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. - PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2; + PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2; end STATE_MISS_SPILL_2: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2_START; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; end STATE_MISS_SPILL_2_START: begin - if (~hit) begin - CntReset = 1'b1; - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end else begin - NextState = STATE_READY; - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - end + if (~hit) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end else begin + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end + PCMux = 2'b10; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end end STATE_MISS_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_MERGE; + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_MERGE; end STATE_MISS_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; NextState = STATE_MISS_SPILL_FINAL; end STATE_MISS_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF | WalkerInstrPageFaultF) begin - NextState = STATE_TLB_MISS_DONE; - end else begin - NextState = STATE_TLB_MISS; - end + if (ITLBWriteF | WalkerInstrPageFaultF) begin + NextState = STATE_TLB_MISS_DONE; + end else begin + NextState = STATE_TLB_MISS; + end end STATE_TLB_MISS_DONE : begin - NextState = STATE_READY; + NextState = STATE_READY; end default: begin - PCMux = 2'b01; - NextState = STATE_READY; + PCMux = 2'b01; + NextState = STATE_READY; end // *** add in error handling and invalidate/evict endcase From d8facacef65568268dd26c4756f97b1fb734ea35 Mon Sep 17 00:00:00 2001 From: Ben Bracker Date: Sat, 3 Jul 2021 11:24:41 -0500 Subject: [PATCH 38/38] src/cache/ICacheCntrl.sv --- wally-pipelined/src/cache/ICacheCntrl.sv | 42 ++++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index b969437e..748b3f5e 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -71,11 +71,11 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ); // FSM states - localparam STATE_READY = 0; - localparam STATE_HIT_SPILL = 1; // spill, block 0 hit - localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. - localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT - localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + localparam STATE_READY = 'h0; + localparam STATE_HIT_SPILL = 'h1; // spill, block 0 hit + localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 'h2; // block 1 miss, issue read to AHB and wait data. + localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 'h3; // write data into SRAM/LUT + localparam STATE_HIT_SPILL_MERGE = 'h4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. // a challenge is the spill signal gets us out of the ready state and moves us to // 1 of the 2 spill branches. However the original fsm design had us return to @@ -91,30 +91,30 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) // between CPU stalling and that register. // Picking option 1. - localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the + localparam STATE_HIT_SPILL_FINAL = 'h5; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data. - localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT - localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT + localparam STATE_MISS_FETCH_WDV = 'h6; // aligned miss, issue read to AHB and wait for data. + localparam STATE_MISS_FETCH_DONE = 'h7; // write data into SRAM/LUT + localparam STATE_MISS_READ = 'h8; // read block 1 from SRAM/LUT - localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait - localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT - localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT - localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, + localparam STATE_MISS_SPILL_FETCH_WDV = 'h9; // spill, miss on block 0, issue read to AHB and wait + localparam STATE_MISS_SPILL_FETCH_DONE = 'ha; // write data into SRAM/LUT + localparam STATE_MISS_SPILL_READ1 = 'hb; // read block 0 from SRAM/LUT + localparam STATE_MISS_SPILL_2 = 'hc; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_2_START = 'hd; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 'he; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 'hf; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 'h10; // read block 0 of CPU access, - localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the + localparam STATE_MISS_SPILL_FINAL = 'h11; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? - localparam STATE_TLB_MISS = 19; - localparam STATE_TLB_MISS_DONE = 20; + localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_TLB_MISS = 'h13; + localparam STATE_TLB_MISS_DONE = 'h14;