diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 1304b40c..a42bfbd4 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -9,7 +9,8 @@ add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/DataStall +#add wave /testbench/dut/hart/DataStall +add wave /testbench/debug add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e..e7098d75 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,8 +115,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - - + localparam STATE_INSTR_PAGE_FAULT = 'h15; + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -370,13 +370,20 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF | WalkerInstrPageFaultF) begin + if (WalkerInstrPageFaultF) begin + NextState = STATE_INSTR_PAGE_FAULT; + ICacheStallF = 1'b0; + end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; end end - STATE_TLB_MISS_DONE : begin + STATE_TLB_MISS_DONE: begin + NextState = STATE_READY; + end + STATE_INSTR_PAGE_FAULT: begin + ICacheStallF = 1'b0; NextState = STATE_READY; end default: begin @@ -425,8 +432,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) // store read data from memory interface before writing into SRAM. genvar i; generate - for (i = 0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(.clk(clk), + for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer + flopenr #(`XLEN) sb(.clk(clk), .reset(reset), .en(InstrAckF & (i == FetchCount)), .d(InstrInF), diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index f40da412..42669752 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -106,7 +106,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate @@ -214,7 +214,7 @@ module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index b0c6f033..4bd079e9 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -216,11 +216,9 @@ module ahblite ( subwordread swr(.*); // Handle AMO instructions if applicable - generate + generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; -// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), -// .result(AMOResult)); amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index ff29dfd7..59f5e439 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -43,90 +43,93 @@ module fpu ( output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - // control logic signal instantiation - logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) - logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - // FMA signals - logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units - logic [161:0] AlignedAddendE, AlignedAddendM; - logic [12:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; - logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - // add/cvt signals - logic [63:0] AddSumE, AddSumM; - logic [63:0] AddSumTcE, AddSumTcM; - logic [3:0] AddSelInvE, AddSelInvM; - logic [10:0] AddExpPostSumE,AddExpPostSumM; - logic AddCorrSignE, AddCorrSignM; - logic AddOp1NormE, AddOp1NormM; - logic AddOp2NormE, AddOp2NormM; - logic AddOpANormE, AddOpANormM; - logic AddOpBNormE, AddOpBNormM; - logic AddInvalidE, AddInvalidM; - logic AddDenormInE, AddDenormInM; - logic AddSwapE, AddSwapM; - logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 - logic AddSignAE, AddSignAM; - logic AddConvertE, AddConvertM; - logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentE, AddExponentM; - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResult64W; - logic [4:0] FPUFlagsW; - - + generate + if (`F_SUPPORTED) begin + // control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + // FMA signals + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units + logic [161:0] AlignedAddendE, AlignedAddendM; + logic [12:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; + logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; + logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResult64W; + logic [4:0] FPUFlagsW; + + @@ -134,189 +137,19 @@ module fpu ( - //DECODE STAGE - - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FWriteEnW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResult64W, - FRD1D, FRD2D, FRD3D); - - - - - - - - - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); - - - - - - - - - - - - - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, - .ForwardXE, .ForwardYE, .ForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); - - - // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, - .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XNaNE, .YNaNE, .ZNaNE ); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - - fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, - .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, - .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - - - - // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, - .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - - // first and only instance of floating-point comparator - fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.SrcXE, .FmtE, .ClassResE); - - // output for store instructions - assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - //***swap to mux - - - - - - - - - - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - - flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, - {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - - flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, - {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - - - - - - - - //BEGIN MEMORY STAGE - - mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); - mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); - - //***change to mux - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, - .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, - .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, - .FMAResM, .FMAFlgM); - - // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, - .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); + //DECODE STAGE + + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FWriteEnW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResult64W, + FRD1D, FRD2D, FRD3D); @@ -326,77 +159,260 @@ module fpu ( + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + + + // first of two-stage instance of floating-point fused multiply-add unit + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); + + + + // first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); + + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); + + // output for store instructions + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; + //***swap to mux + + + + + + + + + + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); + + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); + + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + + + + + + + + //BEGIN MEMORY STAGE + + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + + //***change to mux + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); + + // second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - - flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, - {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - - - - - - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### -//***turn into muxs - always_comb begin - case (FResultSelW) - 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FMAFlgW; - 3'b010 : FPUFlagsW = FAddFlgW; - 3'b011 : FPUFlagsW = FDivSqrtFlgW; - 3'b100 : FPUFlagsW = {4'b0,FFlgW}; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - always_comb begin - case (FResultSelW) - 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FMAResW; - 3'b010 : FPUResult64W = FAddResW; - 3'b011 : FPUResult64W = FDivResultW; - 3'b100 : FPUResult64W = FResW; - default : FPUResult64W = 64'bxxxxx; - endcase - end - - - // interface between XLEN size datapath and double-precision sized - // floating-point results - // - // define offsets for LSB zero extension or truncation - always_comb begin - // zero extension -//***turn into mux - FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; - //*** put into mem stage - SetFflagsM = FPUFlagsW; + + + + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); + + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); + + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); + + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); + + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); + + + + + + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + + + + + + //***turn into muxs + always_comb begin + case (FResultSelW) + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + always_comb begin + case (FResultSelW) + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; + endcase + end + + + // interface between XLEN size datapath and double-precision sized + // floating-point results + // + // define offsets for LSB zero extension or truncation + always_comb begin + // zero extension + //***turn into mux + FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage + SetFflagsM = FPUFlagsW; + end + end else begin // no F_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + assign FPUResultW = 0; end + endgenerate endmodule // fpu diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv index 88152588..70e1076d 100755 --- a/wally-pipelined/src/generic/shift.sv +++ b/wally-pipelined/src/generic/shift.sv @@ -38,13 +38,12 @@ module shift_right #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; @@ -60,13 +59,12 @@ module shift_left #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 102fbbed..ac2c06dd 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -42,7 +42,7 @@ module alu #(parameter WIDTH=32) ( assign {carry, presum} = a + condinvb + {{(WIDTH-1){1'b0}},alucontrol[3]}; // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits - generate + generate if (WIDTH==64) assign sum = w64 ? {{32{presum[31]}}, presum[31:0]} : presum; else diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 44a40045..f041fce6 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -129,7 +129,7 @@ module datapath ( flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported - generate + generate if (`A_SUPPORTED) assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; else diff --git a/wally-pipelined/src/ifu/SRAM2P1R1W.sv b/wally-pipelined/src/ifu/SRAM2P1R1W.sv index d71f8bc4..046aacc6 100644 --- a/wally-pipelined/src/ifu/SRAM2P1R1W.sv +++ b/wally-pipelined/src/ifu/SRAM2P1R1W.sv @@ -97,11 +97,11 @@ module SRAM2P1R1W // write port generate - for (index = 0; index < Width; index = index + 1) begin + for (index = 0; index < Width; index = index + 1) begin:mem always_ff @ (posedge clk) begin - if (WEN1Q & BitWEN1[index]) begin - memory[WA1Q][index] <= WD1Q[index]; - end + if (WEN1Q & BitWEN1[index]) begin + memory[WA1Q][index] <= WD1Q[index]; + end end end endgenerate diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 4fcefe85..24952edf 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -188,7 +188,7 @@ module ifu ( flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor - generate + generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. bpred bpred(.*, diff --git a/wally-pipelined/src/ifu/localHistoryPredictor.sv b/wally-pipelined/src/ifu/localHistoryPredictor.sv index 8aaa85c0..6c5c9478 100644 --- a/wally-pipelined/src/ifu/localHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/localHistoryPredictor.sv @@ -67,7 +67,7 @@ module localHistoryPredictor genvar index; generate - for (index = 0; index < 2**m; index = index +1) begin + for (index = 0; index < 2**m; index = index +1) begin:localhist flopenr #(k) LocalHistoryRegister(.clk(clk), .reset(reset), diff --git a/wally-pipelined/src/lsu/dcache.sv b/wally-pipelined/src/lsu/dcache.sv index fec70ef4..e8dfeb5c 100644 --- a/wally-pipelined/src/lsu/dcache.sv +++ b/wally-pipelined/src/lsu/dcache.sv @@ -151,7 +151,7 @@ module dcachecontroller #(parameter LINESIZE = 256) ( genvar i; generate - for (i=0; i < WORDSPERLINE; i++) begin + for (i=0; i < WORDSPERLINE; i++) begin:sb flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); end endgenerate diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 8d4df6ec..a2bcf52b 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -64,7 +64,7 @@ module lsu ( output logic [1:0] AtomicMaskedM, input logic MemAckW, // from ahb input logic [`XLEN-1:0] HRDATAW, // from ahb - output logic [2:0] Funct3MfromLSU, + output logic [2:0] SizeFromLSU, output logic StallWfromLSU, @@ -132,7 +132,7 @@ module lsu ( logic MMUTranslate; logic HPTWRead; logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -204,7 +204,7 @@ module lsu ( // LSU .DisableTranslation(DisableTranslation), .MemRWMtoLSU(MemRWMtoLSU), - .Funct3MtoLSU(Funct3MtoLSU), + .SizeToLSU(SizeToLSU), .AtomicMtoLSU(AtomicMtoLSU), .MemAdrMtoLSU(MemAdrMtoLSU), .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? @@ -220,7 +220,7 @@ module lsu ( mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.TLBAccessType(MemRWMtoLSU), .VirtualAddress(MemAdrMtoLSU), - .Size(Funct3MtoLSU[1:0]), + .Size(SizeToLSU[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -244,7 +244,7 @@ module lsu ( // Determine if an Unaligned access is taking place always_comb - case(Funct3MtoLSU[1:0]) + case(SizeToLSU[1:0]) 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu @@ -400,7 +400,7 @@ module lsu ( end // always_comb // *** for now just pass through size - assign Funct3MfromLSU = Funct3MtoLSU; + assign SizeFromLSU = SizeToLSU; assign StallWfromLSU = StallWtoLSU; diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 3f57cabb..23e88970 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -54,7 +54,7 @@ module lsuArb // to LSU output logic DisableTranslation, output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, + output logic [2:0] SizeToLSU, output logic [1:0] AtomicMtoLSU, output logic [`XLEN-1:0] MemAdrMtoLSU, output logic [`XLEN-1:0] WriteDataMtoLSU, @@ -87,6 +87,7 @@ module lsuArb statetype CurrState, NextState; logic SelPTW; logic HPTWStallD; + logic [2:0] PTWSize; flopenl #(.TYPE(statetype)) StateReg(.clk(clk), @@ -138,12 +139,9 @@ module lsuArb assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate - if (`XLEN == 32) begin - assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; - end else begin - assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end + assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw endgenerate + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, SizeToLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 50d399ae..0a14d832 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -76,8 +76,9 @@ module pmpadrdec ( generate assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region - for (i=3; i < `PA_BITS; i=i+1) + for (i=3; i < `PA_BITS; i=i+1) begin:mask assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + end endgenerate // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index ee4b261d..9c7f11da 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -63,12 +63,6 @@ module pmpchecker ( // verilator lint_on UNOPTFLAT logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - /* - generate // extract 8-bit chunks from PMPCFG array - for (j=0; j<`PMP_ENTRIES; j = j+8) - assign {PMPCfg[j+7], PMPCfg[j+6], PMPCfg[j+5], PMPCfg[j+4], - PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; - endgenerate */ pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( .PhysicalAddress, @@ -80,7 +74,6 @@ module pmpchecker ( .NoLowerMatchOut(NoLowerMatch), .Match, .Active, .L, .X, .W, .R); - // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 75021265..34400647 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -111,6 +111,7 @@ module tlb #(parameter TLB_ENTRIES = 8, logic [1:0] HitPageType; logic CAMHit; logic [`ASID_BITS-1:0] ASID; + logic DAFault; // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -165,7 +166,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || ((EffectivePrivilegeMode == `S_MODE) && PTE_U); - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X); + // fault for software handling if access bit is off + assign DAFault = ~PTE_A; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X || DAFault); end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; @@ -180,7 +183,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess && ~PTE_W; - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite); + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite || DAFault); end endgenerate diff --git a/wally-pipelined/src/mmu/tlbpriority.sv b/wally-pipelined/src/mmu/tlbpriority.sv index a061f622..5096cae6 100644 --- a/wally-pipelined/src/mmu/tlbpriority.sv +++ b/wally-pipelined/src/mmu/tlbpriority.sv @@ -41,8 +41,9 @@ module tlbpriority #(parameter ENTRIES = 8) ( genvar i; generate assign nolower[0] = 1; - for (i=1; i0; j=j-1) begin - if(pendingRequestsAtMaxP[j]) intClaim = j[5:0]; + for(k=N; k>0; k=k-1) begin + if(pendingRequestsAtMaxP[k]) intClaim = k[5:0]; end end // create threshold mask - always_comb begin - threshMask[7] = ~(7==intThreshold); - threshMask[6] = ~(6==intThreshold) & threshMask[7]; - threshMask[5] = ~(5==intThreshold) & threshMask[6]; - threshMask[4] = ~(4==intThreshold) & threshMask[5]; - threshMask[3] = ~(3==intThreshold) & threshMask[4]; - threshMask[2] = ~(2==intThreshold) & threshMask[3]; - threshMask[1] = ~(1==intThreshold) & threshMask[2]; + always_comb begin + threshMask[7] = (intThreshold != 7); + threshMask[6] = (intThreshold != 6) & threshMask[7]; + threshMask[5] = (intThreshold != 5) & threshMask[6]; + threshMask[4] = (intThreshold != 4) & threshMask[5]; + threshMask[3] = (intThreshold != 3) & threshMask[4]; + threshMask[2] = (intThreshold != 2) & threshMask[3]; + threshMask[1] = (intThreshold != 1) & threshMask[2]; end // is the max priority > threshold? // *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index 62c8ea00..badc4197 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -291,7 +291,7 @@ module uartPC16550D( // although rxfullbit looks like a combinational loop, in one bit rxfifotail == i and breaks the loop generate genvar i; - for (i=0; i<16; i++) begin + for (i=0; i<16; i++) begin:rx assign RXerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set? if (i > 0) assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 1c44565f..f18d5af4 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -159,7 +159,7 @@ module wallypipelinedhart // IEU vs HPTW arbitration signals to send to LSU logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -169,7 +169,7 @@ module wallypipelinedhart logic DataMisalignedMfromLSU; logic StallWtoLSU; logic StallWfromLSU; - logic [2:0] Funct3MfromLSU; + logic [2:0] SizeFromLSU; ifu ifu(.InstrInF(InstrRData), @@ -207,7 +207,7 @@ module wallypipelinedhart .AtomicMaskedM(AtomicMaskedM), .MemAckW(MemAckW), .HRDATAW(HRDATAW), - .Funct3MfromLSU(Funct3MfromLSU), // stays the same + .SizeFromLSU(SizeFromLSU), // stays the same .StallWfromLSU(StallWfromLSU), // stays the same .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? // currently not connected (but will need to be used for lsu talking to ahb. @@ -261,7 +261,7 @@ module wallypipelinedhart //.InstrRData(InstrF), // hook up InstrF later .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking .WriteDataM(WriteDataM), - .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), + .MemSizeM(SizeFromLSU[1:0]), .UnsignedLoadM(SizeFromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), .StallW(StallWfromLSU), diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8c3e28c3..2cf37c17 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -514,6 +514,9 @@ string tests32f[] = '{ logic HMASTLOCK; logic HCLK, HRESETn; logic [`XLEN-1:0] PCW; + + logic [`XLEN-1:0] debug; + assign debug = dut.uncore.dtim.RAM[536872960]; flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW); @@ -656,10 +659,7 @@ string tests32f[] = '{ // Check errors errors = (i == SIGNATURESIZE+1); // error if file is empty i = 0; - if (`XLEN == 32) - testadr = (`TIM_BASE+tests[test+1].atohex())/4; - else - testadr = (`TIM_BASE+tests[test+1].atohex())/8; + testadr = (`TIM_BASE+tests[test+1].atohex())/(`XLEN/8); /* verilator lint_off INFINITELOOP */ while (signature[i] !== 'bx) begin //$display("signature[%h] = %h", i, signature[i]); @@ -669,14 +669,16 @@ string tests32f[] = '{ // kind of hacky test for garbage right now errors = errors+1; $display(" Error on test %s result %d: adr = %h sim = %h, signature = %h", - tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]); + tests[test], i, (testadr+i)*(`XLEN/8), dut.uncore.dtim.RAM[testadr+i], signature[i]); $stop;//***debug end end i = i + 1; end /* verilator lint_on INFINITELOOP */ - if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]); + if (errors == 0) begin + $display("%s succeeded. Brilliant!!!", tests[test]); + end else begin $display("%s failed with %d errors. :(", tests[test], errors); totalerrors = totalerrors+1;