diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 6fe11f354..46cdcd6aa 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -117,13 +117,15 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE ///////////////////////////////////////////////////////////////////////////////////////////// // Choose read address (RAdr). Normally use NextAdr, but use PAdr during stalls - // and FlushAdr when handling D$ flushes + // and FlushAdr when handling D$ flushes + // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap + // sets PCNextF to XTVEC and the icache must start reading the instruction. mux3 #(SETLEN) AdrSelMux( .d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), - .s({SelFlush, (SelAdr | SelHPTW)}), .y(RAdr)); + .s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(RAdr)); // Array of cache ways, along with victim, hit, dirty, and read merging logic - cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) + cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .LineWriteData, .LineByteMask, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .FlushStage, diff --git a/pipelined/src/cache/cachereplacementpolicy.sv b/pipelined/src/cache/cachereplacementpolicy.sv index a791e9fcc..42f0b4413 100644 --- a/pipelined/src/cache/cachereplacementpolicy.sv +++ b/pipelined/src/cache/cachereplacementpolicy.sv @@ -46,10 +46,115 @@ module cachereplacementpolicy logic [SETLEN-1:0] RAdrD; logic LRUWriteEnD; + + localparam LOGNUMWAYS = $clog2(NUMWAYS); + localparam LEN = NUMWAYS-1; + + logic [LOGNUMWAYS-1:0] HitWayEnc; + logic [LEN-1:0] HitWayExpand; + genvar row; + + logic [NUMWAYS-2:0] cEn; + +/* -----\/----- EXCLUDED -----\/----- + // proposed generic solution + + binencoder #(NUMWAYS) encoder(HitWay, HitWayEnc); + + // bit duplication + // expand HitWay as HitWay[3], {{2}{HitWay[2]}}, {{4}{HitWay[1]}, {{8{HitWay[0]}}, ... + for(row = 0; row < LOGNUMWAYS; row++) begin + localparam integer DuplicationFactor = 2**(LOGNUMWAYS-row-1); + localparam integer StartIndex = NUMWAYS-2 - DuplicationFactor + 1; + localparam integer EndIndex = NUMWAYS-2 - 2 * DuplicationFactor + 2; + assign HitWayExpand[StartIndex : EndIndex] = {{DuplicationFactor}{HitWayEnc[row]}}; + end + + + genvar r, a,s; + //localparam s = NUMWAYS-2; + + + assign cEn[NUMWAYS-2] = '1; + for(s = NUMWAYS-2; s >= NUMWAYS/2; s--) begin : enables + localparam p = NUMWAYS - s; + localparam g = $clog2(p); + localparam t0 = s - g; + localparam t1 = t0 - 1; + localparam r = LOGNUMWAYS - g; + assign cEn[t0] = cEn[s] & ~HitWayEnc[r]; + assign cEn[t1] = cEn[s] & HitWayEnc[r]; + end + + mux2 #(1) LRUMuxes[NUMWAYS-2:0](LineReplacementBits, HitWayExpand, cEn, NewReplacement); + + assign VictimWay[0] = ~LineReplacementBits[2] & ~LineReplacementBits[0]; + assign VictimWay[1] = ~LineReplacementBits[2] & LineReplacementBits[0]; + assign VictimWay[2] = LineReplacementBits[2] & ~LineReplacementBits[1]; + assign VictimWay[3] = LineReplacementBits[2] & LineReplacementBits[1]; + -----/\----- EXCLUDED -----/\----- */ + + + +/* -----\/----- EXCLUDED -----\/----- +// logic [NUMWAYS/2-1:0] rawEn [LOGNUMWAYS-1:0]; + for(r = LOGNUMWAYS-1; r >= 0; r--) begin + localparam integer g = 2**(LOGNUMWAYS-r-1); + for(a = g-1; a > 0; a--) begin + localparam t0 = s - 2**(g-1); + localparam t1 = t0 - 1; + localparam s = s - 1; + assign cEn[t0] = cEn[s] & ~HitWayEnc[r]; + assign cEn[t1] = cEn[s] & HitWayEnc[r]; + end + -----/\----- EXCLUDED -----/\----- */ +/* -----\/----- EXCLUDED -----\/----- + for(a = g-1; a > 0; a--) begin + localparam t0 = s - 2**(g-1); + localparam t1 = t0 - 1; + s = s - 1; + end + end + -----/\----- EXCLUDED -----/\----- */ + +/* -----\/----- EXCLUDED -----\/----- + always_comb begin + for(r = LOGNUMWAYS-1; r > 0; r--) begin + localparam g = 2**(LOGNUMWAYS-r-1); + for(a = g-1; a > 0; a--) begin + localparam t0 = s - 2**(g-1); + localparam t1 = t0 - 1; + s = s - 1; + end + end + end + -----/\----- EXCLUDED -----/\----- */ + +/* -----\/----- EXCLUDED -----\/----- + + genvar row2; + logic [LOGNUMWAYS-1:0] indices [LOGNUMWAYS-1:0]; + integer jindex; + always_comb begin + rawEn[LOGNUMWAYS-1] = 1; + for(jindex = 0; jindex < LOGNUMWAYS-1; jindex++) begin + rawEn[jindex] = 0; + rawEn[jindex][~(HitWayEnc>>(jindex+1))] = 1; + + //cEn[2**(LOGNUMWAYS-jindex)-1+jindex:0] = rawEn[jindex][2**(LOGNUMWAYS-jindex)-1:0]; + + end + end + -----/\----- EXCLUDED -----/\----- */ + + + // *** high priority to clean up +/* -----\/----- EXCLUDED -----\/----- initial begin assert (NUMWAYS == 2 || NUMWAYS == 4) else $error("Only 2 or 4 ways supported"); end + -----/\----- EXCLUDED -----/\----- */ // Replacement Bits: Register file // Needs to be resettable for simulation, but could omit reset for synthesis *** diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index dd291ecea..661d53cba 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -47,7 +47,7 @@ module fctrl ( output logic FRegWriteM, FRegWriteW, // FP register write enable output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format - output logic DivStartE, // Start division or squareroot + output logic FDivStartE, IDivStartE, // Start division or squareroot output logic XEnE, YEnE, ZEnE, output logic YEnForwardE, ZEnForwardE, output logic FWriteIntE, FCvtIntE, FWriteIntM, // Write to integer register @@ -62,7 +62,7 @@ module fctrl ( logic [`FCTRLW-1:0] ControlsD; logic IllegalFPUInstrD, IllegalFPUInstrE; logic FRegWriteD; // FP register write enable - logic FDivStartD, FDivStartE, IDivStartE; // integer register write enable + logic FDivStartD; // integer register write enable logic FWriteIntD; // integer register write enable logic FRegWriteE; // FP register write enable logic [2:0] OpCtrlD; // Select which opperation to do in each component @@ -266,10 +266,8 @@ module fctrl ( flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); - if (`M_SUPPORTED) begin - assign IDivStartE = MDUE & Funct3E[2]; - assign DivStartE = FDivStartE | IDivStartE; // integer or floating-point division - end else assign DivStartE = FDivStartE; + if (`M_SUPPORTED) assign IDivStartE = MDUE & Funct3E[2]; + else assign IDivStartE = 0; assign FCvtIntE = (FResSelE == 2'b01); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 14e7cfa99..9bb9117c7 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -40,7 +40,7 @@ module fdivsqrt( input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, - input logic DivStartE, + input logic FDivStartE, IDivStartE, input logic StallM, input logic StallE, input logic SqrtE, SqrtM, @@ -48,7 +48,7 @@ module fdivsqrt( input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, output logic DivSM, - output logic DivBusy, + output logic FDivBusyE, output logic DivDone, output logic [`NE+1:0] QeM, output logic [`DIVb:0] QmM @@ -66,6 +66,7 @@ module fdivsqrt( logic SpecialCaseM; logic [`DIVBLEN:0] n, m; logic OTFCSwap, ALTB, BZero, As; + logic DivStartE; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), @@ -74,14 +75,14 @@ module fdivsqrt( .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, - .DivBusy, .DivStartE,.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, + .FDivBusyE, .FDivStartE, .IDivStartE, .DivStartE, .StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .MDUE, .n, .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, - .DivBusy); + .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]), diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 9222c0c4f..e33688500 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -37,7 +37,7 @@ module fdivsqrtfsm( input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, - input logic DivStartE, + input logic FDivStartE, IDivStartE, input logic XsE, input logic SqrtE, input logic StallE, @@ -45,8 +45,9 @@ module fdivsqrtfsm( input logic WZero, input logic MDUE, input logic [`DIVBLEN:0] n, + output logic DivStartE, output logic DivDone, - output logic DivBusy, + output logic FDivBusyE, output logic SpecialCaseM ); @@ -57,6 +58,15 @@ module fdivsqrtfsm( logic [`DURLEN-1:0] cycles; logic SpecialCaseE; + // *** start logic is presently in fctl. Make it look more like integer division start logic + // DivStartE comes from fctrl, reflecitng the start of floating-point and possibly integer division + assign DivStartE = (FDivStartE | IDivStartE) & (state == IDLE) & ~StallM; + assign DivDone = (state == DONE) | (WZero & (state == BUSY)); // *** used in postprocess.sv and round.sv. This doesn't seem proper. They break when removed. + assign FDivBusyE = (state == BUSY & ~DivDone); // *** want to add | DivStartE but it creates comb loop + + // Divider control signals from MDU + //assign DivBusyE = (state == BUSY) | DivStartE; + // terminate immediately on special cases assign SpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE); flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc @@ -120,8 +130,5 @@ module fdivsqrtfsm( end end - // *** start logic is presently in fctl. Make it look more like integer division start logic - assign DivDone = (state == DONE) | (WZero & (state == BUSY)); - assign DivBusy = (state == BUSY & ~DivDone); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 4df7a1472..a6c6c8bce 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -33,7 +33,7 @@ module fdivsqrtiter( input logic clk, input logic DivStartE, - input logic DivBusy, + input logic FDivBusyE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, input logic SqrtE, @@ -85,8 +85,8 @@ module fdivsqrtiter( // Residual WS/SC registers/initializaiton mux mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN); mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN); - flopen #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]); - flopen #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]); + flopen #(`DIVb+4) wsflop(clk, DivStartE|FDivBusyE, WSN, WS[0]); + flopen #(`DIVb+4) wcflop(clk, DivStartE|FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division @@ -94,8 +94,8 @@ module fdivsqrtiter( assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux); mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux); - flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]); - flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]); + flopen #(`DIVb+1) UReg(clk, DivStartE|FDivBusyE, UMux, U[0]); + flopen #(`DIVb+1) UMReg(clk, DivStartE|FDivBusyE, UMMux, UM[0]); // C register/initialization mux // Initialize C to -1 for sqrt and -R for division @@ -103,7 +103,7 @@ module fdivsqrtiter( assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10; assign initC = {initCUpper, {`DIVb{1'b0}}}; mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); - flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]); + flopen #(`DIVb+2) cflop(clk, DivStartE|FDivBusyE, CMux, C[0]); // Divisior register flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D); diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index dcc0db6d5..d0b4aceef 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -67,7 +67,7 @@ module fpu ( logic FRegWriteW; // FP register write enable logic [2:0] FrmM; // FP rounding mode logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double - logic DivStartE; // Start division or squareroot + logic FDivStartE, IDivStartE; // Start division or squareroot logic FWriteIntM; // Write to integer register logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component @@ -167,7 +167,7 @@ module fpu ( .Funct3E, .MDUE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, - .DivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, + .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -261,9 +261,9 @@ module fpu ( // - fsqrt // *** add other opperations fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, - .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal + .StallE, .StallM, .DivSM, .FDivBusyE, .QeM, .QmM, .DivDone(DivDoneM)); // diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index ee18c4bcd..24365cf96 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -129,7 +129,7 @@ module postprocess ( assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign CvtOp = (PostProcSel == 2'b00); assign FmaOp = (PostProcSel == 2'b10); - assign DivOp = (PostProcSel == 2'b01)&DivDone; + assign DivOp = (PostProcSel == 2'b01) & DivDone; assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used