From 8f98f3bfabf03cea00fc10e176a7c6cd119192ef Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 28 Jun 2022 21:33:31 +0000 Subject: [PATCH] added rv32 double precision stores - untested --- pipelined/src/cache/cache.sv | 13 ++++++++++--- pipelined/src/cache/cacheway.sv | 11 +++++++++-- pipelined/src/fpu/fctrl.sv | 6 +++--- pipelined/src/fpu/fpu.sv | 23 ++++++++++++++++++----- pipelined/src/ieu/datapath.sv | 10 ++++++++-- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 8 +++++--- pipelined/src/lsu/subwordread.sv | 14 +++++++------- pipelined/src/wally/wallypipelinedcore.sv | 11 ++++++++--- 9 files changed, 69 insertions(+), 29 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 2374b4938..d380bfc83 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -43,6 +43,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, input logic [`XLEN-1:0] FinalWriteData, + input logic [`FLEN-1:0] FWriteDataM, + input logic FLoad2, + input logic FpLoadStoreM, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -120,7 +123,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, + CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); @@ -159,8 +162,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + if (`LLEN>`XLEN) + mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData)); + else + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index d9a478612..ac1e26e8f 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, + input logic FLoad2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -74,8 +75,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + if(`LLEN>`XLEN)begin + logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0}; + end else + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); // If writing the whole line set all write enables to 1, else only set the correct word. assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 60d260027..f6ed650af 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -33,8 +33,8 @@ module fctrl ( default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw - 3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd + 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd @@ -121,7 +121,7 @@ module fctrl ( assign FmtD = 0; else if (`FPSIZES == 2)begin logic [1:0] FmtTmp; - assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0]; assign FmtD = (`FMT == FmtTmp); end else if (`FPSIZES == 3|`FPSIZES == 4) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index aba1a8f48..25b39d69b 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -41,10 +41,12 @@ module fpu ( input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [1:0] STATUS_FS, // Is floating-point enabled? output logic FRegWriteM, // FP register write enable - output logic FpLoadM, // Fp load instruction? + output logic FpLoadStoreM, // Fp load instruction? + output logic FLoad2, output logic FStallD, // Stall the decode stage output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register output logic [1:0] FResSelW, @@ -292,8 +294,19 @@ module fpu ( // data to be stored in memory - to IEU // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0]; - else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE}; + if (`LLEN==`XLEN) begin + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + end else begin + logic [`FLEN-1:0] FWriteDataE; + if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; + else assign FLoad2 = FmtM; + + if (`FPSIZES==1) assign FWriteDataE = FSrcYE; + else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + + flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); + end // NaN Block SrcA generate @@ -311,7 +324,7 @@ module fpu ( assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU - if (`FLEN>`XLEN) + if (`FLEN>`XLEN) assign IntSrcXE = FSrcXE[`XLEN-1:0]; else assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; @@ -356,7 +369,7 @@ module fpu ( // ||| ||| ////////////////////////////////////////////////////////////////////////////////////////// - assign FpLoadM = FResSelM[1]; + assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index b7a6a9644..df711695e 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -124,12 +124,18 @@ module datapath ( flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); // floating point interactions: fcvt, fp stores - if (`F_SUPPORTED) begin:fpmux + if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux + logic [`XLEN-1:0] IFCvtResultW; + mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); + assign WriteDataE = ForwardedSrcBE; + mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + end else if (`F_SUPPORTED) begin:fpmux logic [`XLEN-1:0] IFCvtResultW; mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end else begin:fpmux assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE; mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 29d07cc2c..02e748f31 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -227,7 +227,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), + .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 7234a7cac..5c56b1356 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -57,7 +57,9 @@ module lsu ( input logic BigEndianM, input logic sfencevmaM, // fpu - input logic FpLoadM, + input logic [`FLEN-1:0] FWriteDataM, + input logic FLoad2, + input logic FpLoadStoreM, // faults output logic LoadPageFaultM, StoreAmoPageFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, @@ -235,7 +237,7 @@ module lsu ( .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, + .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -269,7 +271,7 @@ module lsu ( subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), - .FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM); + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 4a6d99bfc..d38595d49 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -35,7 +35,7 @@ module subwordread input logic [`LLEN-1:0] ReadDataWordMuxM, input logic [2:0] LSUPAdrM, input logic [2:0] Funct3M, - input logic FpLoadM, + input logic FpLoadStoreM, output logic [`LLEN-1:0] ReadDataM ); @@ -83,16 +83,16 @@ module subwordread case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh + ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw + ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw 3'b011: if(`D_SUPPORTED) - ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld + ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld 3'b100: if(`Q_SUPPORTED) - ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq + ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq else ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu @@ -122,10 +122,10 @@ module subwordread case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh + ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw + ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw 3'b011: ReadDataM = ReadDataWordMuxM; // fld 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index b3f11680b..8ef8ec18b 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -92,13 +92,15 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; + logic FLoad2; + logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; logic FDivBusyE; logic IllegalFPUInstrD, IllegalFPUInstrE; logic FRegWriteM; logic FPUStallD; - logic FpLoadM; + logic FpLoadStoreM; logic [1:0] FResSelW; logic [4:0] SetFflagsM; @@ -253,7 +255,8 @@ module wallypipelinedcore ( .AtomicM, .TrapM, .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, - .FpLoadM, + .FpLoadStoreM, + .FWriteDataM, .FLoad2, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, @@ -391,10 +394,12 @@ module wallypipelinedcore ( .RdM, .RdW, // which FP register to write to (from IEU) .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable - .FpLoadM, + .FpLoadStoreM, + .FLoad2, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory + .FWriteDataM, // Data to be written to memory .FIntResM, // data to be written to integer register .FCvtIntResW, // fp -> int conversion result to be stored in int register .FResSelW, // fpu result selection