From 36be692c0bde99b240164d1fea9e9fd512cd6053 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 22 Aug 2022 17:16:12 +0000 Subject: [PATCH 01/24] sqrt passes - lint warnings remain --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/src/fpu/divsqrt.sv | 9 +++++++-- pipelined/src/fpu/qsel.sv | 20 ++++++++++---------- pipelined/src/fpu/srt.sv | 15 +++++++++++++-- pipelined/src/fpu/srtfsm.sv | 17 +++++++++++++---- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 1f05a4f13..e40506c7a 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,7 +102,7 @@ // division constants `define RADIX 32'h2 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7a9a2ae83..e76d86451 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -59,13 +59,18 @@ module divsqrt( logic [`DIVb:0] X; logic [`DIVN-2:0] D; // U0.N-1 logic [`DIVN-2:0] Dpreproc; + logic [`DIVb:0] LastSM; + logic [`DIVb-1:0] LastC; + logic [`DIVb:0] FirstSM; + logic [`DIVb-1:0] FirstC; logic [`DURLEN-1:0] Dur; logic NegSticky; + logic [`DIVCOPIES-1:0] qn; srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc); - srtfsm srtfsm(.reset, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .qn, .LastSM, .LastC, .FirstSM, .FirstC, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .D, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + srt srt(.clk, .qn, .D, .LastSM, .LastC, .FirstSM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .StickyWSA, .DivBusy, .Qm(QmM)); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 897d81697..68478e89c 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -32,7 +32,7 @@ module qsel2 ( // *** eventually just change to 4 bits input logic [3:0] ps, pc, - output logic qp, qz//, qn + output logic qp, qz, qn ); logic [3:0] p, g; @@ -46,20 +46,20 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign magnitude = ~(&p[2:0]); + //assign magnitude = ~(&p[2:0]); assign cout = g[2] | (p[2] & (g[1] | p[1] & g[0])); - assign sign = p[3] ^ cout; -/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & - (ps[52]^pc[52])); - assign #1 sign = (ps[55]^pc[55])^ - (ps[54] & pc[54] | ((ps[54]^pc[54]) & - (ps[53]&pc[53] | ((ps[53]^pc[53]) & - (ps[52]&pc[52]))))); */ + //assign sign = p[3] ^ cout; + assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) & + (ps[0]^pc[0])); + assign sign = (ps[3]^pc[3])^ + (ps[2] & pc[2] | ((ps[2]^pc[2]) & + (ps[1]&pc[1] | ((ps[1]^pc[1]) & + (ps[0]&pc[0]))))); // Produce quotient = +1, 0, or -1 assign qp = magnitude & ~sign; assign qz = ~magnitude; -// assign #1 qn = magnitude & sign; + assign qn = magnitude & sign; endmodule //////////////////////////////////// diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 27519c95f..636552f06 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -45,6 +45,11 @@ module srt( output logic [`DIVN-2:0] D, // U0.N-1 output logic [`DIVb+3:0] NextWSN, NextWCN, output logic [`DIVb+3:0] StickyWSA, + output logic [`DIVb:0] LastSM, + output logic [`DIVb-1:0] LastC, + output logic [`DIVb:0] FirstSM, + output logic [`DIVb-1:0] FirstC, + output logic [`DIVCOPIES-1:0] qn, output logic [`DIVb+3:0] FirstWS, FirstWC ); @@ -119,7 +124,7 @@ module srt( for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations divinteration divinteration(.D, .DBar, .D2, .DBar2, .SqrtM, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), - .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); + .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]), .qn(qn[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0}; @@ -159,6 +164,11 @@ module srt( assign FirstWS = WS[0]; assign FirstWC = WC[0]; + assign LastSM = SM[`DIVCOPIES-1]; + assign LastC = C[`DIVCOPIES-1]; + assign FirstSM = SM[0]; + assign FirstC = C[0]; + if(`RADIX==2) if (`DIVCOPIES == 1) assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0}; @@ -182,6 +192,7 @@ module divinteration ( input logic [`DIVb-1:0] C, input logic SqrtM, output logic [`DIVb:0] QNext, QMNext, + output logic qn, output logic [`DIVb:0] SNext, SMNext, output logic [`DIVb+3:0] WSA, WCA ); @@ -202,7 +213,7 @@ module divinteration ( // 0010 = -1 // 0001 = -2 if(`RADIX == 2) begin : qsel - qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); + qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz, qn); fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F); end else begin qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q); diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index ebfc653d7..304a219c6 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -46,6 +46,11 @@ module srtfsm( input logic [`DIVN-2:0] D, // U0.N-1 input logic [`DIVb+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, + input logic [`DIVb:0] LastSM, + input logic [`DIVb:0] FirstSM, + input logic [`DIVb-1:0] LastC, + input logic [`DIVb-1:0] FirstC, + input logic [`DIVCOPIES-1:0] qn, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivSE, output logic DivDone, @@ -67,11 +72,15 @@ module srtfsm( // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) begin - logic [`DIVb+3:0] FNext; - assign FNext = SqrtM ? 0 : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + logic [`DIVb+3:0] FZero, FSticky; + logic [`DIVb+3:0] LastK, FirstK; + assign LastK = ({4'b1111, LastC} & ~({4'b1111, LastC} << 1)); + assign FirstK = ({4'b1111, FirstC<<1} & ~({4'b1111, FirstC<<1} << 1)); + assign FZero = SqrtM ? {{2{LastSM[`DIVb]}}, LastSM, 2'b0} | {LastK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FSticky = SqrtM ? {FirstSM, 2'b0} | {FirstK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 - assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|((NextWSN+NextWCN+FNext)==0); - assign DivSE = |W&~((W+FNext)==0); //***not efficent fix == + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]); + assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn end else begin assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); assign DivSE = |W; From d556adde166997c07df02c116f695f697c0d661a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:28:51 -0700 Subject: [PATCH 02/24] Simplified FPU-LSU interface to skip IEU --- pipelined/src/fpu/fpu.sv | 31 +++++++++++++------------------ pipelined/src/hazard/hazard.sv | 2 ++ pipelined/src/ieu/datapath.sv | 15 ++++----------- 3 files changed, 19 insertions(+), 29 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 3e5485acb..4da668934 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -42,10 +42,9 @@ module fpu ( input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) output logic FRegWriteM, // FP register write enable (to privileged unit) output logic FpLoadStoreM, // Fp load instruction? (to LSU) - output logic FStore2, // store two words into memory (to LSU) output logic FStallD, // Stall the decode stage (To HZU) output logic FWriteIntE, // integer register write enable (to IEU) - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN *** delete this output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) @@ -290,22 +289,18 @@ module fpu ( // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - if(`LLEN==`XLEN) - assign FWriteDataE = {{`XLEN-`FLEN{1'b1}}, YE}; - else begin - logic [`FLEN-1:0] WriteDataE; - if(`FPSIZES == 1) assign WriteDataE = YE; - else if(`FPSIZES == 2) assign WriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}}; - else - always_comb - case(FmtE) - `Q_FMT: WriteDataE = YE; - `D_FMT: WriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}}; - `S_FMT: WriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}}; - `H_FMT: WriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}}; - endcase - flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, WriteDataE, FWriteDataM); - end + logic [`FLEN-1:0] WriteDataE; + if(`FPSIZES == 1) assign WriteDataE = YE; + else if(`FPSIZES == 2) assign WriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}}; + else + always_comb + case(FmtE) + `Q_FMT: WriteDataE = YE; + `D_FMT: WriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}}; + `S_FMT: WriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}}; + `H_FMT: WriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}}; + endcase + flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, WriteDataE, FWriteDataM); // NaN Block SrcA generate diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index 3b902aed1..ac3fc9d9e 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -64,8 +64,10 @@ module hazard( assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); +// assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap +// assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)) | FDivBusyE; assign StallWCause = LSUStallM | IFUStallF; diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index d6cabb442..71558178d 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -123,25 +123,18 @@ module datapath ( flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); - // *** simplify WriteDataE in this merge // floating point interactions: fcvt, fp stores - if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux + if (`F_SUPPORTED) begin:fpmux logic [`XLEN-1:0] IFCvtResultW; mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); - assign WriteDataE = ForwardedSrcBE; - mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); - end else if (`F_SUPPORTED) begin:fpmux - logic [`XLEN-1:0] IFCvtResultW; - mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); - mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end else begin:fpmux - assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE; + assign IFResultM = IEUResultM; mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end - + assign WriteDataE = ForwardedSrcBE; + // handle Store Conditional result if atomic extension supported if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; else assign SCResultW = 0; From 8444eca57c375ea75d13f8c7a1e65eda0433bc49 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:29:20 -0700 Subject: [PATCH 03/24] Simplified FPU-LSU interface to skip IEU --- pipelined/src/lsu/lsu.sv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index b278306a1..7b4284fad 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -58,7 +58,6 @@ module lsu ( input logic sfencevmaM, // fpu input logic [`FLEN-1:0] FWriteDataM, - input logic FStore2, input logic FpLoadStoreM, // faults output logic LoadPageFaultM, StoreAmoPageFaultM, @@ -119,7 +118,7 @@ module lsu ( // *** TO DO: Burst mode flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); - flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); + flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); // *** move this flop to IEU assign IEUAdrExtM = {2'b00, IEUAdrM}; assign LSUStallM = DCacheStallM | InterlockStall | BusStall; @@ -233,11 +232,14 @@ module lsu ( mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); + // *** Ross fix up location of mux to be here; remove from IEU datapath + // *** look over entire FPU write and read paths + // *** Why is if(CACHE_ENABLED) begin : dcache - if (`LLEN>`XLEN) - mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + if (`F_SUPPORTED) + mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IEUWriteDataM}}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); else - assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; + assign FinalWriteDataM = IEUWriteDataM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), @@ -280,8 +282,6 @@ module lsu ( // Compute byte masks swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); - // *** fix when when fstore2 is valid. I'm not sure this is even needed if LSUFunct3M can be 3'b100 for a 16 byte write. - //assign FinalByteMaskM = FStore2 ? '1 : ByteMaskM; assign FinalByteMaskM = ByteMaskM; ///////////////////////////////////////////////////////////////////////////////////////////// From ea153e0aad51eb992d1f2d72f5b1ea4a618b8c71 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:29:54 -0700 Subject: [PATCH 04/24] Removed FStore2 and simplified HPTW --- pipelined/src/mmu/hptw.sv | 12 ++++++------ pipelined/src/wally/wallypipelinedcore.sv | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index f96d69f0d..8967b49dd 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -48,7 +48,7 @@ module hptw output logic [1:0] PageType, // page type to TLBs (* mark_debug = "true" *) output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry output logic [`PA_BITS-1:0] HPTWAdr, - output logic [1:0] HPTWRW, // HPTW requesting to read memory + output logic [1:0] HPTWRW, // HPTW requesting to write or read memory output logic [2:0] HPTWSize // 32 or 64 bit access. ); @@ -114,13 +114,15 @@ module hptw logic [`PA_BITS-1:0] HPTWWriteAdr; logic SetDirty; logic Dirty, Accessed; + logic [`XLEN-1:0] AccessedPTE; - assign NextPTE = UpdatePTE ? {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]} : HPTWReadPTE; + assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit + mux2 #(`XLEN) NextPTEMux(HPTWReadPTE, AccessedPTE, UpdatePTE, NextPTE); flopenr #(`PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr); + assign SaveHPTWAdr = WalkerState == L0_ADR; assign SelHPTWWriteAdr = UpdatePTE | HPTWRW[0]; mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); - assign {Dirty, Accessed} = PTE[7:6]; assign WriteAccess = MemRWM[0] | (|AtomicM); @@ -255,9 +257,7 @@ module hptw else NextWalkerState = LEAF; LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; - // *** TODO update PTE with dirty/access. write to TLB and update memory. - // probably want to write the PTE in UPDATE_PTE then go to leaf and update TLB. - UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; + UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; default: begin NextWalkerState = IDLE; // should never be reached diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 200789c41..78fdb3030 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -93,7 +93,6 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; - logic FStore2; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; @@ -258,7 +257,7 @@ module wallypipelinedcore ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM, - .FWriteDataM, .FStore2, + .FWriteDataM, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, @@ -397,8 +396,7 @@ module wallypipelinedcore ( .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable .FpLoadStoreM, - .FStore2, - .FStallD, // Stall the decode stage + .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory .FWriteDataM, // Data to be written to memory From 0e489443f25d241e2b93877d866b106f706990b6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:43:04 -0700 Subject: [PATCH 05/24] Finished FPU-LSU interface cleanup --- pipelined/src/fpu/fpu.sv | 21 +++++++++++---------- pipelined/src/ieu/datapath.sv | 5 ++--- pipelined/src/ieu/ieu.sv | 7 +++---- pipelined/src/lsu/lsu.sv | 4 +--- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 4da668934..935d6b011 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -44,8 +44,7 @@ module fpu ( output logic FpLoadStoreM, // Fp load instruction? (to LSU) output logic FStallD, // Stall the decode stage (To HZU) output logic FWriteIntE, // integer register write enable (to IEU) - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN *** delete this - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) output logic [1:0] FResSelW, // final result selection (to IEU) @@ -59,6 +58,9 @@ module fpu ( // - if there are any unsused bits the most significant bits are filled with 1s // single stored in a double: | 32 1s | single precision value | // - sets the underflow after rounding + + // LSU interface + logic [`FLEN-1:0] FWriteDataE; // control signals logic FRegWriteW; // FP register write enable @@ -289,18 +291,17 @@ module fpu ( // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - logic [`FLEN-1:0] WriteDataE; - if(`FPSIZES == 1) assign WriteDataE = YE; - else if(`FPSIZES == 2) assign WriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}}; + if(`FPSIZES == 1) assign FWriteDataE = YE; + else if(`FPSIZES == 2) assign FWriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}}; else always_comb case(FmtE) - `Q_FMT: WriteDataE = YE; - `D_FMT: WriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}}; - `S_FMT: WriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}}; - `H_FMT: WriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}}; + `Q_FMT: FWriteDataE = YE; + `D_FMT: FWriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}}; + `S_FMT: FWriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}}; + `H_FMT: FWriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}}; endcase - flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, WriteDataE, FWriteDataM); + flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); // NaN Block SrcA generate diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index 71558178d..73e9e94eb 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -45,7 +45,6 @@ module datapath ( input logic JumpE, input logic BranchSignedE, input logic IllegalFPUInstrE, - input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [1:0] FlagsE, @@ -56,7 +55,7 @@ module datapath ( input logic FWriteIntM, input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, - output logic [`XLEN-1:0] WriteDataE, + output logic [`XLEN-1:0] WriteDataM, // Writeback stage signals input logic StallW, FlushW, (* mark_debug = "true" *) input logic RegWriteW, @@ -118,6 +117,7 @@ module datapath ( flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); // Writeback stage pipeline register and logic flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); @@ -133,7 +133,6 @@ module datapath ( assign IFResultM = IEUResultM; mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end - assign WriteDataE = ForwardedSrcBE; // handle Store Conditional result if atomic extension supported if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; diff --git a/pipelined/src/ieu/ieu.sv b/pipelined/src/ieu/ieu.sv index b47cd1d2d..520807061 100644 --- a/pipelined/src/ieu/ieu.sv +++ b/pipelined/src/ieu/ieu.sv @@ -41,7 +41,6 @@ module ieu ( input logic [`XLEN-1:0] PCLinkE, input logic FWriteIntE, input logic IllegalFPUInstrE, - input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] IEUAdrE, output logic MDUE, W64E, output logic [2:0] Funct3E, @@ -51,7 +50,7 @@ module ieu ( input logic SquashSCW, // from LSU output logic [1:0] MemRWM, // read/write control goes to LSU output logic [1:0] AtomicM, // atomic control goes to LSU - output logic [`XLEN-1:0] WriteDataE, // Address and write data to LSU + output logic [`XLEN-1:0] WriteDataM, // write data to LSU output logic [2:0] Funct3M, // size and signedness to LSU output logic [`XLEN-1:0] SrcAM, // to privilege and fpu @@ -107,8 +106,8 @@ module ieu ( datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .IllegalFPUInstrE, - .FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, - .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW, + .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, + .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FResSelW, .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, .CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 7b4284fad..121e3082f 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -50,7 +50,7 @@ module lsu ( // address and write data input logic [`XLEN-1:0] IEUAdrE, (* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM, - input logic [`XLEN-1:0] WriteDataE, + (* mark_debug = "true" *)input logic [`XLEN-1:0] WriteDataM, output logic [`LLEN-1:0] ReadDataW, // cpu privilege input logic [1:0] PrivilegeModeW, @@ -111,14 +111,12 @@ module lsu ( logic SelLSUBusWord; logic DataDAPageFaultM; logic [`XLEN-1:0] LSUWriteDataM; - logic [`XLEN-1:0] WriteDataM; logic [`LLEN-1:0] ReadDataM; logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM; // *** TO DO: Burst mode flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); - flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); // *** move this flop to IEU assign IEUAdrExtM = {2'b00, IEUAdrM}; assign LSUStallM = DCacheStallM | InterlockStall | BusStall; From c789b5789c7de49289bd5523ee0e1d65a14d2660 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:47:19 -0700 Subject: [PATCH 06/24] renamed GrantData to LSUGrant --- pipelined/src/ebu/ahblite.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index 4203f0f52..0bf81f62a 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -84,7 +84,7 @@ module ahblite ( typedef enum logic [1:0] {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; statetype BusState, NextBusState; - logic GrantData; + logic LSUGrant; logic [31:0] AccessAddress; logic [2:0] ISize; @@ -132,12 +132,12 @@ module ahblite ( // bus outputs - assign #1 GrantData = (NextBusState == MEMREAD) | (NextBusState == MEMWRITE); - assign AccessAddress = (GrantData) ? LSUBusAdr[31:0] : IFUBusAdr[31:0]; + assign #1 LSUGrant = (NextBusState == MEMREAD) | (NextBusState == MEMWRITE); + assign AccessAddress = (LSUGrant) ? LSUBusAdr[31:0] : IFUBusAdr[31:0]; assign HADDR = AccessAddress; assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway - assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize; - assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst. + assign HSIZE = (LSUGrant) ? {1'b0, LSUBusSize[1:0]} : ISize; + assign HBURST = (LSUGrant) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst. /* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE. 000: Single (SINGLE) @@ -153,7 +153,7 @@ module ahblite ( assign HPROT = 4'b0011; // not used; see Section 3.7 - assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise + assign HTRANS = (LSUGrant) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); // Byte mask for HWSTRB From 6cfbf95d98114343ca8c294d71c5409b116d3dab Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:47:56 -0700 Subject: [PATCH 07/24] Renamed signals for LSU - FPU interface --- pipelined/src/wally/wallypipelinedcore.sv | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 78fdb3030..f1af34294 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -92,7 +92,6 @@ module wallypipelinedcore ( logic [4:0] RdM, RdW; logic FStallD; logic FWriteIntE; - logic [`XLEN-1:0] FWriteDataE; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; @@ -130,7 +129,7 @@ module wallypipelinedcore ( // cpu lsu interface logic [2:0] Funct3M; logic [`XLEN-1:0] IEUAdrE; - (* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataE; + (* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataM; (* mark_debug = "true" *) logic [`XLEN-1:0] IEUAdrM; logic [`LLEN-1:0] ReadDataW; logic CommittedM; @@ -219,14 +218,14 @@ module wallypipelinedcore ( // Execute Stage interface .PCE, .PCLinkE, .FWriteIntE, .IllegalFPUInstrE, - .FWriteDataE, .IEUAdrE, .MDUE, .W64E, + .IEUAdrE, .MDUE, .W64E, .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B // Memory stage interface .SquashSCW, // from LSU .MemRWM, // read/write control goes to LSU .AtomicM, // atomic control goes to LSU - .WriteDataE, // Write data to LSU + .WriteDataM, // Write data to LSU .Funct3M, // size and signedness to LSU .SrcAM, // to privilege and fpu .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, @@ -259,7 +258,7 @@ module wallypipelinedcore ( .FpLoadStoreM, .FWriteDataM, //.DataMisalignedM(DataMisalignedM), - .IEUAdrE, .IEUAdrM, .WriteDataE, + .IEUAdrE, .IEUAdrM, .WriteDataM, .ReadDataW, .FlushDCacheM, // connected to ahb (all stay the same) .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit, @@ -398,7 +397,6 @@ module wallypipelinedcore ( .FpLoadStoreM, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable - .FWriteDataE, // Data to be written to memory .FWriteDataM, // Data to be written to memory .FIntResM, // data to be written to integer register .FCvtIntResW, // fp -> int conversion result to be stored in int register @@ -410,7 +408,6 @@ module wallypipelinedcore ( end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; - assign FWriteDataE = 0; assign FIntResM = 0; assign FDivBusyE = 0; assign IllegalFPUInstrD = 1; From 3c91df95d9365bbf5baad67ec032981493fe0215 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 22 Aug 2022 13:56:46 -0700 Subject: [PATCH 08/24] Named HTRANS states in busfsm --- pipelined/src/lsu/busfsm.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelined/src/lsu/busfsm.sv b/pipelined/src/lsu/busfsm.sv index 00c561423..88fd4025e 100644 --- a/pipelined/src/lsu/busfsm.sv +++ b/pipelined/src/lsu/busfsm.sv @@ -78,6 +78,8 @@ module busfsm #(parameter integer WordCountThreshold, STATE_BUS_UNCACHED_READ_DONE, STATE_BUS_CPU_BUSY} busstatetype; + typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; + (* mark_debug = "true" *) busstatetype BusCurrState, BusNextState; // Used to send address for address stage of AHB. @@ -154,7 +156,7 @@ module busfsm #(parameter integer WordCountThreshold, assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access. assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck; // Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up. - assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00; + assign LSUTransType = (|WordCount) & ~UnCachedRW ? AHB_SEQ : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? AHB_NONSEQ : AHB_IDLE; // Reset if we aren't initiating a transaction or if we are finishing a transaction. assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete; From 16a92eaf10b00d14ff828edefcf2ab04a82d56ce Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 07:23:24 -0700 Subject: [PATCH 09/24] Updated testbench assertions. --- pipelined/testbench/testbench.sv | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 3aa123fc3..55bf0be6c 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -395,9 +395,11 @@ module riscvassertions; assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)"); + assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); + assert (`F_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting float (F)"); + assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); + assert (`DMEM == `MEM_CACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - // assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); @@ -423,6 +425,8 @@ module riscvassertions; assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); end + + // *** DH 8/23/ endmodule From e714b7588844aac932c24572ae7053a253fff421 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 07:35:47 -0700 Subject: [PATCH 10/24] LSU minor edits --- pipelined/config/rv64gc/wally-config.vh | 2 +- pipelined/src/lsu/lsu.sv | 3 +++ pipelined/testbench/testbench.sv | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 19750d6be..9c9e3376b 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -86,7 +86,7 @@ // WFI Timeout Wait `define WFI_TIMEOUT_BIT 16 -// Peripheral Physiccal Addresses +// Peripheral Physical Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 121e3082f..cf44fa297 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -196,6 +196,9 @@ module lsu ( logic SelUncachedAdr; assign IgnoreRequest = IgnoreRequestTLB | TrapM; + // The LSU allows both a DTIM and bus with cache. However, the PMA decoding presently + // use the same RAM_BASE addresss for both the DTIM and any RAM in the Uncore. + if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 55bf0be6c..ab57c0cbf 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -398,7 +398,7 @@ module riscvassertions; assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); assert (`F_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting float (F)"); assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DMEM == `MEM_CACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN"); + assert (`DMEM == `MEM_CACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); From 7fcc8526875c31cfe7302d66c1f7d298a15bc3e5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 08:29:59 -0700 Subject: [PATCH 11/24] Q depends on D --- pipelined/testbench/testbench.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index ab57c0cbf..5df212b52 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -396,7 +396,7 @@ module riscvassertions; assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4"); assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`F_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting float (F)"); + assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); assert (`DMEM == `MEM_CACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); From 3b0758440388d8cea2b1639719b8f04fbcd70430 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 10:34:39 -0500 Subject: [PATCH 12/24] Updated the names of the *WriteDataM inside the LSU to more meaningful names. Moved the FWriteDataMux so that the bus and dtim both get fpu stores. Modified the PMA to disallow double sized reads when XLEN=32. --- pipelined/src/generic/flop/simpleram.sv | 10 +++--- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/atomic.sv | 8 ++--- pipelined/src/lsu/dtim.sv | 8 ++--- pipelined/src/lsu/lsu.sv | 45 +++++++++++-------------- pipelined/src/lsu/lsuvirtmen.sv | 6 ++-- pipelined/src/lsu/subwordwrite.sv | 31 +++++++++++------ pipelined/src/mmu/adrdecs.sv | 12 +++---- 8 files changed, 63 insertions(+), 59 deletions(-) diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv index b08021614..08c7d113b 100644 --- a/pipelined/src/generic/flop/simpleram.sv +++ b/pipelined/src/generic/flop/simpleram.sv @@ -34,15 +34,15 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( input logic clk, input logic [31:0] a, input logic we, - input logic [`XLEN/8-1:0] ByteMask, - input logic [`XLEN-1:0] wd, - output logic [`XLEN-1:0] rd + input logic [`LLEN/8-1:0] ByteMask, + input logic [`LLEN-1:0] wd, + output logic [`LLEN-1:0] rd ); localparam ADDR_WDITH = $clog2(RANGE/8); - localparam OFFSET = $clog2(`XLEN/8); + localparam OFFSET = $clog2(`LLEN/8); - bram1p1rw #(`XLEN/8, 8, ADDR_WDITH) + bram1p1rw #(`LLEN/8, 8, ADDR_WDITH) memory(.clk, .we, .bwe(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd)); endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index b8e636c5f..e699bc576 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -187,7 +187,7 @@ module ifu ( if (`IMEM == `MEM_TIM) begin : irom // *** fix up dtim taking PA_BITS rather than XLEN, *** IEUAdr is a bad name. Probably use a ROM rather than DTIM dtim irom(.clk, .reset, .CPUBusy, .LSURWM(2'b10), .IEUAdrM({{(`XLEN-32){1'b0}}, PCPF[31:0]}), .IEUAdrE(PCNextFSpill), - .TrapM(1'b0), .FinalWriteDataM(), .ByteMaskM('0), + .TrapM(1'b0), .WriteDataM(), .ByteMaskM('0), .ReadDataWordM({{(`XLEN-32){1'b0}}, FinalInstrRawF}), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead), .BusCommittedM(), .DCacheStallM(ICacheStallF), .Cacheable(CacheableF), .DCacheCommittedM(), .DCacheMiss(ICacheMiss), .DCacheAccess(ICacheAccess)); diff --git a/pipelined/src/lsu/atomic.sv b/pipelined/src/lsu/atomic.sv index 5a0753974..2c7259a19 100644 --- a/pipelined/src/lsu/atomic.sv +++ b/pipelined/src/lsu/atomic.sv @@ -34,23 +34,23 @@ module atomic ( input logic clk, input logic reset, StallW, input logic [`XLEN-1:0] ReadDataM, - input logic [`XLEN-1:0] LSUWriteDataM, + input logic [`XLEN-1:0] IMWriteDataM, input logic [`PA_BITS-1:0] LSUPAdrM, input logic [6:0] LSUFunct7M, input logic [2:0] LSUFunct3M, input logic [1:0] LSUAtomicM, input logic [1:0] PreLSURWM, input logic IgnoreRequest, - output logic [`XLEN-1:0] AMOWriteDataM, + output logic [`XLEN-1:0] IMAWriteDataM, output logic SquashSCW, output logic [1:0] LSURWM); logic [`XLEN-1:0] AMOResult; logic MemReadM; - amoalu amoalu(.srca(ReadDataM), .srcb(LSUWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), + amoalu amoalu(.srca(ReadDataM), .srcb(IMWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), .result(AMOResult)); - mux2 #(`XLEN) wdmux(LSUWriteDataM, AMOResult, LSUAtomicM[1], AMOWriteDataM); + mux2 #(`XLEN) wdmux(IMWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM, .SquashSCW, .LSURWM); diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 5b4969ab8..4dcbda665 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -36,10 +36,10 @@ module dtim( input logic [`XLEN-1:0] IEUAdrM, input logic [`XLEN-1:0] IEUAdrE, input logic TrapM, - input logic [`XLEN-1:0] FinalWriteDataM, - input logic [`XLEN/8-1:0] ByteMaskM, + input logic [`LLEN-1:0] WriteDataM, + input logic [`LLEN/8-1:0] ByteMaskM, input logic Cacheable, - output logic [`XLEN-1:0] ReadDataWordM, + output logic [`LLEN-1:0] ReadDataWordM, output logic BusStall, output logic LSUBusWrite, output logic LSUBusRead, @@ -53,7 +53,7 @@ module dtim( .clk, .ByteMask(ByteMaskM), .a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently *** .we(LSURWM[0] & Cacheable & ~TrapM), // have to ignore write if Trap. - .wd(FinalWriteDataM), .rd(ReadDataWordM)); + .wd(WriteDataM), .rd(ReadDataWordM)); // since we have a local memory the bus connections are all disabled. // There are no peripherals supported. diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index cf44fa297..cb37e1ef3 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -110,9 +110,10 @@ module lsu ( logic BusCommittedM, DCacheCommittedM; logic SelLSUBusWord; logic DataDAPageFaultM; - logic [`XLEN-1:0] LSUWriteDataM; + logic [`XLEN-1:0] IMWriteDataM, IMAWriteDataM; + logic [`LLEN-1:0] IMAFWriteDataM; logic [`LLEN-1:0] ReadDataM; - logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM; + logic [(`LLEN-1)/8:0] ByteMaskM; // *** TO DO: Burst mode @@ -131,7 +132,7 @@ module lsu ( .TrapM, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, - .IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, + .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, .LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW, .IgnoreRequestTLB); end else begin @@ -140,7 +141,7 @@ module lsu ( assign LSUAdrE = IEUAdrE[11:0]; assign PreLSUPAdrM = IEUAdrExtM; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; - assign LSUWriteDataM = WriteDataM; + assign IMWriteDataM = WriteDataM; end // CommittedM tells the CPU's privilege unit the current instruction @@ -188,8 +189,7 @@ module lsu ( // Memory System // Either Data Cache or Data Tightly Integrated Memory or just bus interface ///////////////////////////////////////////////////////////////////////////////////////////// - logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM; - logic [`LLEN-1:0] FinalWriteDataM; + logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM; logic IgnoreRequest; @@ -202,7 +202,7 @@ module lsu ( if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops - dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData + dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .WriteDataM(LSUWriteDataM), //*** fix the dtim FinalWriteData .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, .DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM), .DCacheMiss, .DCacheAccess); @@ -230,23 +230,15 @@ module lsu ( mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), + mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); - - // *** Ross fix up location of mux to be here; remove from IEU datapath - // *** look over entire FPU write and read paths - // *** Why is if(CACHE_ENABLED) begin : dcache - if (`F_SUPPORTED) - mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IEUWriteDataM}}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); - else - assign FinalWriteDataM = IEUWriteDataM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(FinalByteMaskM), .WordCount, - .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), + .ByteMask(ByteMaskM), .WordCount, + .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), @@ -266,24 +258,27 @@ module lsu ( // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// if (`A_SUPPORTED) begin:atomic - atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM, + atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .IMWriteDataM, .LSUPAdrM, .LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest, - .AMOWriteDataM, .SquashSCW, .LSURWM); + .IMAWriteDataM, .SquashSCW, .LSURWM); end else begin:lrsc - assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign AMOWriteDataM = LSUWriteDataM; + assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign IMAWriteDataM = IMWriteDataM; end + if (`F_SUPPORTED) + mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IMAWriteDataM}}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); + else assign IMAFWriteDataM = IMAWriteDataM; + ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), - .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); + .LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); - assign FinalByteMaskM = ByteMaskM; ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register @@ -297,10 +292,10 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// if (`BIGENDIAN_SUPPORTED) begin:endian - bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM)); + bigendianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); end else begin - assign IEUWriteDataM = LittleEndianWriteDataM; + assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordM; end diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 748aa3df0..a2d7c6285 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -54,7 +54,7 @@ module lsuvirtmem( output logic [6:0] LSUFunct7M, input logic [`XLEN-1:0] IEUAdrE, output logic [`XLEN-1:0] PTE, - output logic [`XLEN-1:0] LSUWriteDataM, + output logic [`XLEN-1:0] IMWriteDataM, output logic [1:0] PageType, output logic [1:0] PreLSURWM, output logic [1:0] LSUAtomicM, @@ -112,8 +112,8 @@ module lsuvirtmem( mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM); if(`HPTW_WRITES_SUPPORTED) - mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, LSUWriteDataM); - else assign LSUWriteDataM = WriteDataM; + mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM); + else assign IMWriteDataM = WriteDataM; mux2 #(12) replaymux(PreLSUAdrE, IEUAdrExtM[11:0], SelReplayMemE, LSUAdrE); // replay cpu request after hptw. *** redudant with mux in cache. // always block interrupts when using the hardware page table walker. diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index d42033ef7..59546ec74 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -33,25 +33,34 @@ module subwordwrite ( input logic [2:0] LSUPAdrM, input logic [2:0] LSUFunct3M, - input logic [`XLEN-1:0] AMOWriteDataM, - output logic [`XLEN-1:0] LittleEndianWriteDataM); + input logic [`LLEN-1:0] IMAFWriteDataM, + output logic [`LLEN-1:0] LittleEndianWriteDataM); // Replicate data for subword writes - if (`XLEN == 64) begin:sww + if (`LLEN == 128) begin:sww + always_comb + case(LSUFunct3M[2:0]) + 2'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb + 2'b001: LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh + 2'b010: LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw + 2'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (`LLEN == 64) begin:sww always_comb case(LSUFunct3M[1:0]) - 2'b00: LittleEndianWriteDataM = {8{AMOWriteDataM[7:0]}}; // sb - 2'b01: LittleEndianWriteDataM = {4{AMOWriteDataM[15:0]}}; // sh - 2'b10: LittleEndianWriteDataM = {2{AMOWriteDataM[31:0]}}; // sw - 2'b11: LittleEndianWriteDataM = AMOWriteDataM; // sw + 2'b00: LittleEndianWriteDataM = {8{IMAFWriteDataM[7:0]}}; // sb + 2'b01: LittleEndianWriteDataM = {4{IMAFWriteDataM[15:0]}}; // sh + 2'b10: LittleEndianWriteDataM = {2{IMAFWriteDataM[31:0]}}; // sw + 2'b11: LittleEndianWriteDataM = IMAFWriteDataM; // sd endcase end else begin:sww // 32-bit always_comb case(LSUFunct3M[1:0]) - 2'b00: LittleEndianWriteDataM = {4{AMOWriteDataM[7:0]}}; // sb - 2'b01: LittleEndianWriteDataM = {2{AMOWriteDataM[15:0]}}; // sh - 2'b10: LittleEndianWriteDataM = AMOWriteDataM; // sw - default: LittleEndianWriteDataM = AMOWriteDataM; // shouldn't happen + 2'b00: LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}}; // sb + 2'b01: LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh + 2'b10: LittleEndianWriteDataM = IMAFWriteDataM; // sw + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen endcase end endmodule diff --git a/pipelined/src/mmu/adrdecs.sv b/pipelined/src/mmu/adrdecs.sv index 0104ca578..3923c2a67 100644 --- a/pipelined/src/mmu/adrdecs.sv +++ b/pipelined/src/mmu/adrdecs.sv @@ -38,17 +38,17 @@ module adrdecs ( output logic [8:0] SelRegions ); + localparam logic [3:0] SUPPORTED_SIZE = (`XLEN == 64 ? 4'b1111 : 4'b0111); // Determine which region of physical memory (if any) is being accessed - // *** eventually uncomment Access signals - adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, 4'b1111, SelRegions[7]); - adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, /*1'b1*/AccessRX, Size, 4'b1111, SelRegions[6]); - adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, /*1'b1*/AccessRWX, Size, 4'b1111, SelRegions[5]); + adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[6]); + adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[5]); - adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[4]); + adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[4]); adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[3]); adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[2]); adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[1]); - adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, 4'b1100, SelRegions[0]); // *** PMA chapter says xlen only like CLINT + adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[0]); assign SelRegions[8] = ~|(SelRegions[7:0]); From aa5cbab0d8e78084bc30bc9ef4f9103fa562fb63 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 10:43:47 -0500 Subject: [PATCH 13/24] Replaced LSU data replication with 0 extention. --- pipelined/src/lsu/lsu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index cb37e1ef3..9c99134fe 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -266,7 +266,7 @@ module lsu ( end if (`F_SUPPORTED) - mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IMAWriteDataM}}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); + mux2 #(`LLEN) datamux({{{`LLEN-`XLEN}{1'b0}}, IMAWriteDataM}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); else assign IMAFWriteDataM = IMAWriteDataM; ///////////////////////////////////////////////////////////////////////////////////////////// From 5efec3b1f309d79df2b475e88cf84540eb64ecf9 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 10:46:03 -0500 Subject: [PATCH 14/24] Replaced FPU data replicaiton on WriteData bus with 0 extention. --- pipelined/src/fpu/fpu.sv | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 935d6b011..af26052eb 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -59,9 +59,6 @@ module fpu ( // single stored in a double: | 32 1s | single precision value | // - sets the underflow after rounding - // LSU interface - logic [`FLEN-1:0] FWriteDataE; - // control signals logic FRegWriteW; // FP register write enable logic [2:0] FrmM; // FP rounding mode @@ -291,17 +288,7 @@ module fpu ( // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - if(`FPSIZES == 1) assign FWriteDataE = YE; - else if(`FPSIZES == 2) assign FWriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}}; - else - always_comb - case(FmtE) - `Q_FMT: FWriteDataE = YE; - `D_FMT: FWriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}}; - `S_FMT: FWriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}}; - `H_FMT: FWriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}}; - endcase - flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); + flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); // NaN Block SrcA generate From 20ba6fd19cbbed54262fbb2bbdd917126c3bcd49 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 11:14:53 -0500 Subject: [PATCH 15/24] Reversed order of supported sized in adrdecs. --- pipelined/src/mmu/adrdecs.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/mmu/adrdecs.sv b/pipelined/src/mmu/adrdecs.sv index 3923c2a67..d2768033b 100644 --- a/pipelined/src/mmu/adrdecs.sv +++ b/pipelined/src/mmu/adrdecs.sv @@ -38,7 +38,7 @@ module adrdecs ( output logic [8:0] SelRegions ); - localparam logic [3:0] SUPPORTED_SIZE = (`XLEN == 64 ? 4'b1111 : 4'b0111); + localparam logic [3:0] SUPPORTED_SIZE = (`LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[6]); From 4e33ead4134cac5eaebe3987c34cb56ecd74d891 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 23 Aug 2022 16:36:20 +0000 Subject: [PATCH 16/24] renamed rounding bits to L,G,R,S and fixed lint warning --- pipelined/src/fpu/flags.sv | 8 +-- pipelined/src/fpu/postprocess.sv | 12 ++--- pipelined/src/fpu/resultsign.sv | 3 +- pipelined/src/fpu/round.sv | 93 +++++++++++++++----------------- pipelined/src/fpu/srtfsm.sv | 10 ++-- 5 files changed, 59 insertions(+), 67 deletions(-) diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 67fdb4935..73cc3ae35 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -50,7 +50,7 @@ module flags( input logic [`NE+1:0] Me, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs - input logic R, UfL, S, UfPlus1, // bits used to determine rounding + input logic R, G, S, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic [4:0] PostProcFlg // flags @@ -126,16 +126,16 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); + assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) // | | - assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R)&~IntInvalid; + assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R|G)&~IntInvalid; // select the inexact flag to output assign Inexact = ToInt ? IntInexact : FpInexact; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index f96101443..8039f7c37 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -83,15 +83,13 @@ module postprocess ( logic [`NE+1:0] Me; logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow - logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) - logic R; // bits needed to determine rounding logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic UfL; + logic G, R, S; // bits needed to determine rounding logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaMe; // exponent of the normalized sum @@ -201,16 +199,16 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, .DivS, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S, + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S, .G, .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// @@ -220,7 +218,7 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .G, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index c2912ece7..8d6dbb6e9 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -39,6 +39,7 @@ module resultsign( input logic Mult, input logic R, input logic S, + input logic G, input logic Ms, output logic Ws ); @@ -60,7 +61,7 @@ module resultsign( // - if a multiply opperation is done, then use the products sign(Ps) // - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign) // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign - assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(R|G|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 0943413bd..d33d894a9 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -60,16 +60,14 @@ module round( output logic S, // sticky bit output logic [`NE+1:0] Me, output logic Plus1, - output logic R, UfL // bits needed to calculate rounding + output logic R, G // bits needed to calculate rounding ); - logic L; // bit used for rounding - least significant bit of the normalized sum logic UfCalcPlus1; logic NormS; // normalized sum's sticky bit - logic UfS; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic UfR; - logic FpRound, FpLSBRes, FpUfRound; + logic FpG, FpL, FpR; + logic L; // lsb of result logic CalcPlus1, FpPlus1; logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -176,106 +174,101 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; + assign S = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpRound = Mf[`CORRSHIFTSZ-`NF-1]; - assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; - assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; + assign FpG = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpL = Mf[`CORRSHIFTSZ-`NF]; + assign FpR = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; - assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; - assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; + assign FpG = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpL = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpR = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpRound = Mf[`CORRSHIFTSZ-`NF-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; - FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; + FpG = Mf[`CORRSHIFTSZ-`NF-1]; + FpL = Mf[`CORRSHIFTSZ-`NF]; + FpR = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpRound = Mf[`CORRSHIFTSZ-`NF1-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`NF1]; - FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2]; + FpG = Mf[`CORRSHIFTSZ-`NF1-1]; + FpL = Mf[`CORRSHIFTSZ-`NF1]; + FpR = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpRound = Mf[`CORRSHIFTSZ-`NF2-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`NF2]; - FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2]; + FpG = Mf[`CORRSHIFTSZ-`NF2-1]; + FpL = Mf[`CORRSHIFTSZ-`NF2]; + FpR = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin - FpRound = 1'bx; - FpLSBRes = 1'bx; - FpUfRound = 1'bx; + FpG = 1'bx; + FpL = 1'bx; + FpR = 1'bx; end endcase end else if (`FPSIZES == 4) begin always_comb case (OutFmt) 2'h3: begin - FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF]; - FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; + FpG = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpL = Mf[`CORRSHIFTSZ-`Q_NF]; + FpR = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpRound = Mf[`CORRSHIFTSZ-`D_NF-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF]; - FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2]; + FpG = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpL = Mf[`CORRSHIFTSZ-`D_NF]; + FpR = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpRound = Mf[`CORRSHIFTSZ-`S_NF-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF]; - FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2]; + FpG = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpL = Mf[`CORRSHIFTSZ-`S_NF]; + FpR = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpRound = Mf[`CORRSHIFTSZ-`H_NF-1]; - FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF]; - FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2]; + FpG = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpL = Mf[`CORRSHIFTSZ-`H_NF]; + FpR = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound; - assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes; - assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; - - // used to determine underflow flag - assign UfL = FpRound; - // determine sticky - assign S = UfS | UfR; + assign G = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpG; + assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpL; + assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpR; always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & (S| L);//round to nearest even + 3'b000: CalcPlus1 = G & (R|S|L);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero 3'b010: CalcPlus1 = Ms;//round down 3'b011: CalcPlus1 = ~Ms;//round up - 3'b100: CalcPlus1 = R;//round to nearest max magnitude + 3'b100: CalcPlus1 = G;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even + 3'b000: UfCalcPlus1 = R & (S|G);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero 3'b010: UfCalcPlus1 = Ms;//round down 3'b011: UfCalcPlus1 = ~Ms;//round up - 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude + 3'b100: UfCalcPlus1 = R;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (S | R); + assign Plus1 = CalcPlus1 & (S|R|G); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky + assign UfPlus1 = UfCalcPlus1 & (S|R); // Compute rounded result if (`FPSIZES == 1) begin diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 304a219c6..e07f3760b 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -73,11 +73,11 @@ module srtfsm( // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) begin logic [`DIVb+3:0] FZero, FSticky; - logic [`DIVb+3:0] LastK, FirstK; - assign LastK = ({4'b1111, LastC} & ~({4'b1111, LastC} << 1)); - assign FirstK = ({4'b1111, FirstC<<1} & ~({4'b1111, FirstC<<1} << 1)); - assign FZero = SqrtM ? {{2{LastSM[`DIVb]}}, LastSM, 2'b0} | {LastK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; - assign FSticky = SqrtM ? {FirstSM, 2'b0} | {FirstK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + logic [`DIVb+2:0] LastK, FirstK; + assign LastK = ({3'b111, LastC} & ~({3'b111, LastC} << 1)); + assign FirstK = ({3'b111, FirstC<<1} & ~({3'b111, FirstC<<1} << 1)); + assign FZero = SqrtM ? {LastSM[`DIVb], LastSM, 2'b0} | {LastK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]); assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn From 029aecabf72f2b1785fea624098e9d2330ceafb6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 10:14:54 -0700 Subject: [PATCH 17/24] typo in srtfsm --- pipelined/src/fpu/srtfsm.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 304a219c6..a24fb526a 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -56,7 +56,7 @@ module srtfsm( output logic DivDone, output logic NegSticky, output logic DivBusy - ); +); typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; @@ -69,7 +69,7 @@ module srtfsm( assign DivBusy = (state == BUSY); // calculate sticky bit // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result - // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant + // this is only a problem on radix 2 (and possibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) begin logic [`DIVb+3:0] FZero, FSticky; From c61dba61926eabf2b21bf0a74eb32adc91e7a7ac Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 10:23:08 -0700 Subject: [PATCH 18/24] Fixed LSU typos --- pipelined/src/lsu/lsu.sv | 2 +- pipelined/src/lsu/subwordwrite.sv | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 9c99134fe..5c1eb2270 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -230,7 +230,7 @@ module lsu ( mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM), + mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM[`XLEN-1:0]), .s(SelUncachedAdr), .y(LSUBusHWDATA)); if(CACHE_ENABLED) begin : dcache cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index 59546ec74..237d1138f 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -40,10 +40,10 @@ module subwordwrite ( if (`LLEN == 128) begin:sww always_comb case(LSUFunct3M[2:0]) - 2'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb - 2'b001: LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh - 2'b010: LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw - 2'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd + 3'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb + 3'b001: LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh + 3'b010: LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw + 3'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd default: LittleEndianWriteDataM = IMAFWriteDataM; // sq endcase end else if (`LLEN == 64) begin:sww From f72d07adceae6f2dc6b8ae21b3930d3dcc0eb693 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 11:08:02 -0700 Subject: [PATCH 19/24] Improved illegal instruction checking in FPU --- pipelined/src/fpu/fctrl.sv | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 934aba2cd..aab6872e3 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -67,22 +67,40 @@ module fctrl ( logic [1:0] FResSelD; // Select one of the results that finish in the memory stage logic [2:0] FrmD, FrmE; // FP rounding mode logic [`FMTBITS-1:0] FmtD; // FP format - //*** will putting x for don't cares reduce area in synthisis??? + logic [1:0] Fmt; + logic SupportedFmt; + // FPU Instruction Decoder + assign Fmt = Funct7D[1:0]; + // Note: only Fmt is checked; fcvt does not check destination format + assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) | + (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED)); always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; + else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) + ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // for anything other than loads and stores, check for supported format else case(OpD) // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw - 3'b011: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw + 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fld not supported + 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flq + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // flq not supported + 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flh + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // flh not supported + default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw - 3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd - default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw + 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsd not supported + 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsq + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsq not supported + 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsh + else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsh not supported + default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub From d19fc99bf0377eebd19634eab136a0b28fead003 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 11:16:36 -0700 Subject: [PATCH 20/24] Simplify IEU-FP datapath --- pipelined/src/ieu/datapath.sv | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index 73e9e94eb..571501315 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -87,8 +87,8 @@ module datapath ( // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ResultW; - logic [`XLEN-1:0] IFResultW; - + logic [`XLEN-1:0] IFResultW, IFCvtResultW; + // Decode stage assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; @@ -123,16 +123,14 @@ module datapath ( flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); - // floating point interactions: fcvt, fp stores + // floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt if (`F_SUPPORTED) begin:fpmux - logic [`XLEN-1:0] IFCvtResultW; mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end else begin:fpmux - assign IFResultM = IEUResultM; - mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW; end + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); // handle Store Conditional result if atomic extension supported if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; From 05aa18fe146c1a41c31809c848433a6f0899eb71 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 12:17:19 -0700 Subject: [PATCH 21/24] Cleaned up fcvt selection control to IEU and FPUIllegalInst signals --- pipelined/src/fpu/fctrl.sv | 25 +++++++++++------------ pipelined/src/fpu/fpu.sv | 13 ++++++------ pipelined/src/ieu/datapath.sv | 6 ++---- pipelined/src/ieu/ieu.sv | 8 +++----- pipelined/src/privileged/privdec.sv | 5 ++--- pipelined/src/privileged/privileged.sv | 9 +++----- pipelined/src/privileged/privpiperegs.sv | 17 ++++++++------- pipelined/src/wally/wallypipelinedcore.sv | 20 ++++++++++-------- 8 files changed, 47 insertions(+), 56 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index aab6872e3..367983217 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -41,7 +41,7 @@ module fctrl ( input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? input logic FDivBusyE, // is the divider busy - output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction + output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction output logic FRegWriteM, FRegWriteW, // FP register write enable output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format @@ -52,12 +52,13 @@ module fctrl ( output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit + output logic FCvtIntW, output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input ); `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; - logic IllegalFPUInstrE; + logic IllegalFPUInstrD, IllegalFPUInstrE; logic FRegWriteD; // FP register write enable logic DivStartD; // integer register write enable logic FWriteIntD; // integer register write enable @@ -257,23 +258,21 @@ module fctrl ( // 10 - xor sign // D/E pipleine register - flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE}); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); - if(`FLEN>`XLEN) - flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE); // E/M pipleine register - flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); - if(`FLEN>`XLEN) - flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM); + flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, IllegalFPUInstrM}); // M/W pipleine register flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResSelM}, {FRegWriteW, FResSelW}); + + assign FCvtIntW = (FResSelW == 2'b01); endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index af26052eb..57fd4aa18 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -47,9 +47,9 @@ module fpu ( output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) - output logic [1:0] FResSelW, // final result selection (to IEU) + output logic FCvtIntW, // select FCvtIntRes (to IEU) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit) + output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); @@ -67,10 +67,9 @@ module fpu ( logic FWriteIntM; // Write to integer register logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input - logic IllegalFPUInstrM; logic XEnE, YEnE, ZEnE; logic YEnForwardE, ZEnForwardE; @@ -147,7 +146,7 @@ module fpu ( logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed - + // DECODE STAGE ////////////////////////////////////////////////////////////////////////////////////////// @@ -163,9 +162,9 @@ module fpu ( // calculate FP control signals fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, + .reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, - .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); + .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1E, .Adr2E, .Adr3E); // FP register file fregfile fregfile (.clk, .reset, .we4(FRegWriteW), diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index 571501315..89ebd9b5d 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -44,7 +44,6 @@ module datapath ( input logic ALUResultSrcE, input logic JumpE, input logic BranchSignedE, - input logic IllegalFPUInstrE, input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [1:0] FlagsE, @@ -52,7 +51,7 @@ module datapath ( output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B // Memory stage signals input logic StallM, FlushM, - input logic FWriteIntM, + input logic FWriteIntM, FCvtIntW, input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, @@ -62,7 +61,6 @@ module datapath ( input logic SquashSCW, input logic [2:0] ResultSrcW, input logic [`XLEN-1:0] FCvtIntResW, - input logic [1:0] FResSelW, input logic [`XLEN-1:0] ReadDataW, // input logic [`XLEN-1:0] PCLinkW, input logic [`XLEN-1:0] CSRReadValW, MDUResultW, @@ -126,7 +124,7 @@ module datapath ( // floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt if (`F_SUPPORTED) begin:fpmux mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); - mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); + mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); end else begin:fpmux assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW; end diff --git a/pipelined/src/ieu/ieu.sv b/pipelined/src/ieu/ieu.sv index 520807061..d7101b873 100644 --- a/pipelined/src/ieu/ieu.sv +++ b/pipelined/src/ieu/ieu.sv @@ -39,8 +39,7 @@ module ieu ( // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, - input logic FWriteIntE, - input logic IllegalFPUInstrE, + input logic FWriteIntE, FCvtIntW, output logic [`XLEN-1:0] IEUAdrE, output logic MDUE, W64E, output logic [2:0] Funct3E, @@ -60,7 +59,6 @@ module ieu ( // Writeback stage input logic [`XLEN-1:0] CSRReadValW, MDUResultW, - input logic [1:0] FResSelW, input logic [`XLEN-1:0] FCvtIntResW, output logic [4:0] RdW, input logic [`XLEN-1:0] ReadDataW, @@ -105,9 +103,9 @@ module ieu ( datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, - .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .IllegalFPUInstrE, + .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, - .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FResSelW, + .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW, .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, .CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); diff --git a/pipelined/src/privileged/privdec.sv b/pipelined/src/privileged/privdec.sv index 367c9e825..a63bf9824 100644 --- a/pipelined/src/privileged/privdec.sv +++ b/pipelined/src/privileged/privdec.sv @@ -43,7 +43,7 @@ module privdec ( output logic EcallFaultM, BreakpointFaultM, output logic sretM, mretM, wfiM, sfencevmaM); - logic IllegalPrivilegedInstrM, IllegalOrDisabledFPUInstrM; + logic IllegalPrivilegedInstrM; logic WFITimeoutM; logic StallMQ; logic ebreakM, ecallM; @@ -92,7 +92,6 @@ module privdec ( // Fault on illegal instructions /////////////////////////////////////////// assign IllegalPrivilegedInstrM = PrivilegedM & ~(sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM); - assign IllegalOrDisabledFPUInstrM = IllegalFPUInstrM | (STATUS_FS == 2'b00); - assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalOrDisabledFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM | + assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM | WFITimeoutM; endmodule diff --git a/pipelined/src/privileged/privileged.sv b/pipelined/src/privileged/privileged.sv index f9f8a99a2..029ab2e5a 100644 --- a/pipelined/src/privileged/privileged.sv +++ b/pipelined/src/privileged/privileged.sv @@ -52,7 +52,7 @@ module privileged ( input logic ICacheAccess, input logic PrivilegedM, input logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM, - input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrD, + input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrM, input logic LoadMisalignedFaultM, input logic StoreAmoMisalignedFaultM, input logic MTimerInt, MExtInt, SExtInt, MSwInt, @@ -69,7 +69,6 @@ module privileged ( input logic StoreAmoAccessFaultM, input logic SelHPTW, - output logic IllegalFPUInstrE, output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, @@ -88,7 +87,6 @@ module privileged ( logic sretM, mretM; logic IllegalCSRAccessM; logic IllegalIEUInstrFaultM; - logic IllegalFPUInstrM; logic InstrPageFaultM; logic InstrAccessFaultM; logic IllegalInstrFaultM; @@ -148,9 +146,8 @@ module privileged ( .IllegalCSRAccessM, .BigEndianM); privpiperegs ppr(.clk, .reset, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .InstrPageFaultF, .InstrAccessFaultF, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, - .IllegalFPUInstrE, - .InstrPageFaultM, .InstrAccessFaultM, .IllegalIEUInstrFaultM, .IllegalFPUInstrM); + .InstrPageFaultF, .InstrAccessFaultF, .IllegalIEUInstrFaultD, + .InstrPageFaultM, .InstrAccessFaultM, .IllegalIEUInstrFaultM); trap trap(.reset, .InstrMisalignedFaultM, .InstrAccessFaultM, .IllegalInstrFaultM, diff --git a/pipelined/src/privileged/privpiperegs.sv b/pipelined/src/privileged/privpiperegs.sv index db1a77228..541f04132 100644 --- a/pipelined/src/privileged/privpiperegs.sv +++ b/pipelined/src/privileged/privpiperegs.sv @@ -35,10 +35,9 @@ module privpiperegs ( input logic StallD, StallE, StallM, input logic FlushD, FlushE, FlushM, input logic InstrPageFaultF, InstrAccessFaultF, - input logic IllegalIEUInstrFaultD, IllegalFPUInstrD, - output logic IllegalFPUInstrE, + input logic IllegalIEUInstrFaultD, output logic InstrPageFaultM, InstrAccessFaultM, - output logic IllegalIEUInstrFaultM, IllegalFPUInstrM + output logic IllegalIEUInstrFaultM ); logic InstrPageFaultD, InstrAccessFaultD; @@ -49,10 +48,10 @@ module privpiperegs ( flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD, {InstrPageFaultF, InstrAccessFaultF}, {InstrPageFaultD, InstrAccessFaultD}); - flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE, - {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD, IllegalFPUInstrD}, - {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE}); - flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM, - {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE}, - {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM, IllegalFPUInstrM}); + flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE, + {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, + {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE}); + flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM, + {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE}, + {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM}); endmodule \ No newline at end of file diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index f1af34294..cd71ee419 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -94,9 +94,10 @@ module wallypipelinedcore ( logic FWriteIntE; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; - logic [`XLEN-1:0] FCvtIntResW; + logic [`XLEN-1:0] FCvtIntResW; + logic FCvtIntW; logic FDivBusyE; - logic IllegalFPUInstrD, IllegalFPUInstrE; + logic IllegalFPUInstrM; logic FRegWriteM; logic FPUStallD; logic FpLoadStoreM; @@ -217,7 +218,7 @@ module wallypipelinedcore ( .IllegalBaseInstrFaultD, // Execute Stage interface - .PCE, .PCLinkE, .FWriteIntE, .IllegalFPUInstrE, + .PCE, .PCLinkE, .FWriteIntE, .IEUAdrE, .MDUE, .W64E, .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -235,7 +236,7 @@ module wallypipelinedcore ( .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .InstrValidM, .FCvtIntResW, - .FResSelW, + .FCvtIntW, // hazards .StallD, .StallE, .StallM, .StallW, @@ -344,7 +345,7 @@ module wallypipelinedcore ( .RASPredPCWrongM, .BPPredClassNonCFIWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, - .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, + .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, @@ -354,7 +355,7 @@ module wallypipelinedcore ( // *** do these need to be split up into one for dmem and one for ifu? // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? .InstrAccessFaultF, .LoadAccessFaultM, .StoreAmoAccessFaultM, .SelHPTW, - .IllegalFPUInstrE, + .IllegalFPUInstrM, .PrivilegeModeW, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, @@ -400,17 +401,18 @@ module wallypipelinedcore ( .FWriteDataM, // Data to be written to memory .FIntResM, // data to be written to integer register .FCvtIntResW, // fp -> int conversion result to be stored in int register - .FResSelW, // fpu result selection + .FCvtIntW, // fpu result selection .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + .IllegalFPUInstrM, // Is the instruction an illegal fpu instruction .SetFflagsM // FPU flags (to privileged unit) ); // floating point unit end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; assign FIntResM = 0; + assign FCvtIntW = 0; assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; + assign IllegalFPUInstrM = 1; assign SetFflagsM = 0; end endmodule From d72068d582dbb769f134b8f56ab68c57440b4f51 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 12:57:18 -0700 Subject: [PATCH 22/24] Only stall FPU to IEU on convert instructions with dependencies --- pipelined/src/fpu/fctrl.sv | 4 +++- pipelined/src/fpu/fpu.sv | 3 ++- pipelined/src/ieu/forward.sv | 4 ++-- pipelined/src/ieu/ieu.sv | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 367983217..50961f27c 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -48,7 +48,7 @@ module fctrl ( output logic DivStartE, // Start division or squareroot output logic XEnE, YEnE, ZEnE, output logic YEnForwardE, ZEnForwardE, - output logic FWriteIntE, FWriteIntM, // Write to integer register + output logic FWriteIntE, FCvtIntE, FWriteIntM, // Write to integer register output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit @@ -264,6 +264,8 @@ module fctrl ( flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + assign FCvtIntE = (FResSelE == 2'b01); + // E/M pipleine register flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE}, diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 57fd4aa18..fe69bb081 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -44,6 +44,7 @@ module fpu ( output logic FpLoadStoreM, // Fp load instruction? (to LSU) output logic FStallD, // Stall the decode stage (To HZU) output logic FWriteIntE, // integer register write enable (to IEU) + output logic FCvtIntE, // Convert to int (to IEU) output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) @@ -163,7 +164,7 @@ module fpu ( fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, - .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, + .DivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1E, .Adr2E, .Adr3E); // FP register file diff --git a/pipelined/src/ieu/forward.sv b/pipelined/src/ieu/forward.sv index 07bd89d3d..856fbded1 100644 --- a/pipelined/src/ieu/forward.sv +++ b/pipelined/src/ieu/forward.sv @@ -35,7 +35,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, MDUE, CSRReadE, input logic RegWriteM, RegWriteW, - input logic FWriteIntE, + input logic FCvtIntE, input logic SCE, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, @@ -58,7 +58,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign MatchDE = (Rs1D == RdE) | (Rs2D == RdE); // Decode-stage instruction source depends on result from execute stage instruction - assign FPUStallD = 0; // FWriteIntE & MatchDE; // FPU to Integer transfers have single-cycle latency + assign FPUStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt assign LoadStallD = (MemReadE|SCE) & MatchDE; assign MDUStallD = MDUE & MatchDE; assign CSRRdStallD = CSRReadE & MatchDE; diff --git a/pipelined/src/ieu/ieu.sv b/pipelined/src/ieu/ieu.sv index d7101b873..6258566ff 100644 --- a/pipelined/src/ieu/ieu.sv +++ b/pipelined/src/ieu/ieu.sv @@ -39,7 +39,7 @@ module ieu ( // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, - input logic FWriteIntE, FCvtIntW, + input logic FWriteIntE, FCvtIntE, FCvtIntW, output logic [`XLEN-1:0] IEUAdrE, output logic MDUE, W64E, output logic [2:0] Funct3E, @@ -112,7 +112,7 @@ module ieu ( forward fw( .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, .MemReadE, .MDUE, .CSRReadE, .RegWriteM, .RegWriteW, - .FWriteIntE, .SCE, .ForwardAE, .ForwardBE, + .FCvtIntE, .SCE, .ForwardAE, .ForwardBE, .FPUStallD, .LoadStallD, .MDUStallD, .CSRRdStallD); endmodule From 5eebd521c558c99655dbc7feb31885a433cbeb0d Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 23 Aug 2022 14:14:41 -0700 Subject: [PATCH 23/24] Fixed FPU-IEU forwarding stall --- pipelined/src/wally/wallypipelinedcore.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index cd71ee419..db6da30b0 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -171,6 +171,7 @@ module wallypipelinedcore ( logic BreakpointFaultM, EcallFaultM; logic InstrDAPageFaultF; logic BigEndianM; + logic FCvtIntE; ifu ifu( .clk, .reset, @@ -218,7 +219,7 @@ module wallypipelinedcore ( .IllegalBaseInstrFaultD, // Execute Stage interface - .PCE, .PCLinkE, .FWriteIntE, + .PCE, .PCLinkE, .FWriteIntE, .FCvtIntE, .IEUAdrE, .MDUE, .W64E, .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -397,7 +398,7 @@ module wallypipelinedcore ( .FRegWriteM, // FP register write enable .FpLoadStoreM, .FStallD, // Stall the decode stage - .FWriteIntE, // integer register write enable + .FWriteIntE, .FCvtIntE, // integer register write enable, conversion operation .FWriteDataM, // Data to be written to memory .FIntResM, // data to be written to integer register .FCvtIntResW, // fp -> int conversion result to be stored in int register @@ -409,6 +410,7 @@ module wallypipelinedcore ( end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; + assign FCvtIntE = 0; assign FIntResM = 0; assign FCvtIntW = 0; assign FDivBusyE = 0; From 642dc170d7453184789bc92555c3896e7e380443 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 18:51:11 -0500 Subject: [PATCH 24/24] Found small bug in busfsm which was issuing 1 extra memory read after each cache line fetch. Does not appear to have translated to an extra read out of ahblite. --- pipelined/src/lsu/busdp.sv | 5 +++-- pipelined/src/lsu/busfsm.sv | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index b241d75f1..d06ba89b7 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -71,14 +71,14 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0; logic [`PA_BITS-1:0] LocalLSUBusAdr; logic [LOGWPL-1:0] WordCountDelayed; - + logic BufferCaptureEn; // *** implement flops as an array if feasbile; DLSUBusBuffer might be a problem // *** better name than DLSUBusBuffer genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer logic [WORDSPERLINE-1:0] CaptureWord; - assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed); + assign CaptureWord[index] = BufferCaptureEn & (index == WordCountDelayed); flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA), .q(DLSUBusBuffer[(index+1)*`XLEN-1:index*`XLEN])); end @@ -90,5 +90,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm( .clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine, .LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .SelLSUBusWord, .LSUBusRead, + .BufferCaptureEn, .LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed); endmodule diff --git a/pipelined/src/lsu/busfsm.sv b/pipelined/src/lsu/busfsm.sv index 88fd4025e..7b0c63f66 100644 --- a/pipelined/src/lsu/busfsm.sv +++ b/pipelined/src/lsu/busfsm.sv @@ -55,6 +55,7 @@ module busfsm #(parameter integer WordCountThreshold, output logic DCacheBusAck, output logic BusCommittedM, output logic SelUncachedAdr, + output logic BufferCaptureEn, output logic [LOGWPL-1:0] WordCount, WordCountDelayed); @@ -167,15 +168,15 @@ module busfsm #(parameter integer WordCountThreshold, (BusCurrState == STATE_BUS_WRITE); assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) | (BusCurrState == STATE_BUS_UNCACHED_WRITE); - assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE); + assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE & ~WordCountFlag); assign SelLSUBusWord = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0]) | (BusCurrState == STATE_BUS_UNCACHED_WRITE) | (BusCurrState == STATE_BUS_WRITE); assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[1] & ~IgnoreRequest) | (BusCurrState == STATE_BUS_UNCACHED_READ); - assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine); - + assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH & ~(WordCountFlag)) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine); + assign BufferCaptureEn = UnCachedLSUBusRead | BusCurrState == STATE_BUS_FETCH; // Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache. assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;