forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally
This commit is contained in:
commit
b8cc06a434
@ -86,7 +86,7 @@
|
||||
// WFI Timeout Wait
|
||||
`define WFI_TIMEOUT_BIT 16
|
||||
|
||||
// Peripheral Physiccal Addresses
|
||||
// Peripheral Physical Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
|
@ -102,7 +102,7 @@
|
||||
|
||||
// division constants
|
||||
`define RADIX 32'h2
|
||||
`define DIVCOPIES 32'h1
|
||||
`define DIVCOPIES 32'h2
|
||||
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
|
||||
// `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
|
||||
`define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input
|
||||
|
@ -84,7 +84,7 @@ module ahblite (
|
||||
typedef enum logic [1:0] {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype;
|
||||
statetype BusState, NextBusState;
|
||||
|
||||
logic GrantData;
|
||||
logic LSUGrant;
|
||||
logic [31:0] AccessAddress;
|
||||
logic [2:0] ISize;
|
||||
|
||||
@ -132,12 +132,12 @@ module ahblite (
|
||||
|
||||
|
||||
// bus outputs
|
||||
assign #1 GrantData = (NextBusState == MEMREAD) | (NextBusState == MEMWRITE);
|
||||
assign AccessAddress = (GrantData) ? LSUBusAdr[31:0] : IFUBusAdr[31:0];
|
||||
assign #1 LSUGrant = (NextBusState == MEMREAD) | (NextBusState == MEMWRITE);
|
||||
assign AccessAddress = (LSUGrant) ? LSUBusAdr[31:0] : IFUBusAdr[31:0];
|
||||
assign HADDR = AccessAddress;
|
||||
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
|
||||
assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
|
||||
assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
|
||||
assign HSIZE = (LSUGrant) ? {1'b0, LSUBusSize[1:0]} : ISize;
|
||||
assign HBURST = (LSUGrant) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
|
||||
|
||||
/* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
|
||||
000: Single (SINGLE)
|
||||
@ -153,7 +153,7 @@ module ahblite (
|
||||
|
||||
|
||||
assign HPROT = 4'b0011; // not used; see Section 3.7
|
||||
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
|
||||
assign HTRANS = (LSUGrant) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
|
||||
assign HMASTLOCK = 0; // no locking supported
|
||||
assign HWRITE = (NextBusState == MEMWRITE);
|
||||
// Byte mask for HWSTRB
|
||||
|
@ -59,13 +59,18 @@ module divsqrt(
|
||||
logic [`DIVb:0] X;
|
||||
logic [`DIVN-2:0] D; // U0.N-1
|
||||
logic [`DIVN-2:0] Dpreproc;
|
||||
logic [`DIVb:0] LastSM;
|
||||
logic [`DIVb-1:0] LastC;
|
||||
logic [`DIVb:0] FirstSM;
|
||||
logic [`DIVb-1:0] FirstC;
|
||||
logic [`DURLEN-1:0] Dur;
|
||||
logic NegSticky;
|
||||
logic [`DIVCOPIES-1:0] qn;
|
||||
|
||||
srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc);
|
||||
|
||||
srtfsm srtfsm(.reset, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
|
||||
srtfsm srtfsm(.reset, .qn, .LastSM, .LastC, .FirstSM, .FirstC, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
|
||||
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
|
||||
srt srt(.clk, .D, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
|
||||
srt srt(.clk, .qn, .D, .LastSM, .LastC, .FirstSM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
|
||||
.StickyWSA, .DivBusy, .Qm(QmM));
|
||||
endmodule
|
@ -41,23 +41,24 @@ module fctrl (
|
||||
input logic [2:0] FRM_REGW, // rounding mode from CSR
|
||||
input logic [1:0] STATUS_FS, // is FPU enabled?
|
||||
input logic FDivBusyE, // is the divider busy
|
||||
output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
|
||||
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
|
||||
output logic FRegWriteM, FRegWriteW, // FP register write enable
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic DivStartE, // Start division or squareroot
|
||||
output logic XEnE, YEnE, ZEnE,
|
||||
output logic YEnForwardE, ZEnForwardE,
|
||||
output logic FWriteIntE, FWriteIntM, // Write to integer register
|
||||
output logic FWriteIntE, FCvtIntE, FWriteIntM, // Write to integer register
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
output logic FCvtIntW,
|
||||
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
|
||||
);
|
||||
|
||||
`define FCTRLW 11
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
logic IllegalFPUInstrE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FRegWriteD; // FP register write enable
|
||||
logic DivStartD; // integer register write enable
|
||||
logic FWriteIntD; // integer register write enable
|
||||
@ -67,22 +68,40 @@ module fctrl (
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
//*** will putting x for don't cares reduce area in synthisis???
|
||||
logic [1:0] Fmt;
|
||||
logic SupportedFmt;
|
||||
|
||||
// FPU Instruction Decoder
|
||||
assign Fmt = Funct7D[1:0];
|
||||
// Note: only Fmt is checked; fcvt does not check destination format
|
||||
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
|
||||
(Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
|
||||
always_comb
|
||||
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
|
||||
ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1;
|
||||
else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt)
|
||||
ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // for anything other than loads and stores, check for supported format
|
||||
else case(OpD)
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fld not supported
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flq
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // flq not supported
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flh
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // flh not supported
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsd not supported
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsq
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsq not supported
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsh
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // fsh not supported
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
|
||||
@ -239,23 +258,23 @@ module fctrl (
|
||||
// 10 - xor sign
|
||||
|
||||
// D/E pipleine register
|
||||
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, IllegalFPUInstrD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
|
||||
if(`FLEN>`XLEN)
|
||||
flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE);
|
||||
assign FCvtIntE = (FResSelE == 2'b01);
|
||||
|
||||
// E/M pipleine register
|
||||
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
|
||||
if(`FLEN>`XLEN)
|
||||
flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM);
|
||||
flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, IllegalFPUInstrM});
|
||||
// M/W pipleine register
|
||||
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResSelM},
|
||||
{FRegWriteW, FResSelW});
|
||||
|
||||
assign FCvtIntW = (FResSelW == 2'b01);
|
||||
|
||||
endmodule
|
||||
|
@ -50,7 +50,7 @@ module flags(
|
||||
input logic [`NE+1:0] Me, // exponent of the normalized sum
|
||||
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
|
||||
input logic FmaAs, FmaPs, // the product and modified Z signs
|
||||
input logic R, UfL, S, UfPlus1, // bits used to determine rounding
|
||||
input logic R, G, S, UfPlus1, // bits used to determine rounding
|
||||
output logic DivByZero,
|
||||
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
|
||||
output logic [4:0] PostProcFlg // flags
|
||||
@ -126,16 +126,16 @@ module flags(
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R)&~IntInvalid;
|
||||
assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R|G)&~IntInvalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
@ -42,16 +42,15 @@ module fpu (
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic FStore2, // store two words into memory (to LSU)
|
||||
output logic FStallD, // Stall the decode stage (To HZU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
|
||||
output logic FCvtIntE, // Convert to int (to IEU)
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic [1:0] FResSelW, // final result selection (to IEU)
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
|
||||
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
|
||||
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
|
||||
);
|
||||
|
||||
@ -60,7 +59,7 @@ module fpu (
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// single stored in a double: | 32 1s | single precision value |
|
||||
// - sets the underflow after rounding
|
||||
|
||||
|
||||
// control signals
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
@ -69,10 +68,9 @@ module fpu (
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
logic IllegalFPUInstrM;
|
||||
logic XEnE, YEnE, ZEnE;
|
||||
logic YEnForwardE, ZEnForwardE;
|
||||
|
||||
@ -149,7 +147,7 @@ module fpu (
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
|
||||
|
||||
// DECODE STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -165,9 +163,9 @@ module fpu (
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
|
||||
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
|
||||
.reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
|
||||
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
|
||||
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
|
||||
.reset, .clk, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
|
||||
.DivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
|
||||
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1E, .Adr2E, .Adr3E);
|
||||
|
||||
// FP register file
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
@ -290,22 +288,7 @@ module fpu (
|
||||
// - FP uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
|
||||
if(`LLEN==`XLEN)
|
||||
assign FWriteDataE = {{`XLEN-`FLEN{1'b1}}, YE};
|
||||
else begin
|
||||
logic [`FLEN-1:0] WriteDataE;
|
||||
if(`FPSIZES == 1) assign WriteDataE = YE;
|
||||
else if(`FPSIZES == 2) assign WriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}};
|
||||
else
|
||||
always_comb
|
||||
case(FmtE)
|
||||
`Q_FMT: WriteDataE = YE;
|
||||
`D_FMT: WriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}};
|
||||
`S_FMT: WriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}};
|
||||
`H_FMT: WriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}};
|
||||
endcase
|
||||
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, WriteDataE, FWriteDataM);
|
||||
end
|
||||
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
|
||||
|
||||
// NaN Block SrcA
|
||||
generate
|
||||
|
@ -83,15 +83,13 @@ module postprocess (
|
||||
logic [`NE+1:0] Me;
|
||||
logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
|
||||
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic S; // S bit
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic R; // bits needed to determine rounding
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
logic IntInvalid, Overflow, Invalid; // flags
|
||||
logic UfL;
|
||||
logic G, R, S; // bits needed to determine rounding
|
||||
logic [`FMTBITS-1:0] OutFmt;
|
||||
// fma signals
|
||||
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
|
||||
@ -201,16 +199,16 @@ module postprocess (
|
||||
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
|
||||
.Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
|
||||
|
||||
round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
|
||||
.DivS, .DivDone,
|
||||
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
|
||||
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S,
|
||||
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S, .G,
|
||||
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -220,7 +218,7 @@ module postprocess (
|
||||
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
|
||||
.NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
|
||||
.UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
|
||||
.G, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
|
||||
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -32,7 +32,7 @@
|
||||
|
||||
module qsel2 ( // *** eventually just change to 4 bits
|
||||
input logic [3:0] ps, pc,
|
||||
output logic qp, qz//, qn
|
||||
output logic qp, qz, qn
|
||||
);
|
||||
|
||||
logic [3:0] p, g;
|
||||
@ -46,20 +46,20 @@ module qsel2 ( // *** eventually just change to 4 bits
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign magnitude = ~(&p[2:0]);
|
||||
//assign magnitude = ~(&p[2:0]);
|
||||
assign cout = g[2] | (p[2] & (g[1] | p[1] & g[0]));
|
||||
assign sign = p[3] ^ cout;
|
||||
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
|
||||
(ps[52]^pc[52]));
|
||||
assign #1 sign = (ps[55]^pc[55])^
|
||||
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
|
||||
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
|
||||
(ps[52]&pc[52]))))); */
|
||||
//assign sign = p[3] ^ cout;
|
||||
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
|
||||
(ps[0]^pc[0]));
|
||||
assign sign = (ps[3]^pc[3])^
|
||||
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
|
||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
||||
(ps[0]&pc[0])))));
|
||||
|
||||
// Produce quotient = +1, 0, or -1
|
||||
assign qp = magnitude & ~sign;
|
||||
assign qz = ~magnitude;
|
||||
// assign #1 qn = magnitude & sign;
|
||||
assign qn = magnitude & sign;
|
||||
endmodule
|
||||
|
||||
////////////////////////////////////
|
||||
|
@ -39,6 +39,7 @@ module resultsign(
|
||||
input logic Mult,
|
||||
input logic R,
|
||||
input logic S,
|
||||
input logic G,
|
||||
input logic Ms,
|
||||
output logic Ws
|
||||
);
|
||||
@ -60,7 +61,7 @@ module resultsign(
|
||||
// - if a multiply opperation is done, then use the products sign(Ps)
|
||||
// - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign)
|
||||
// - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
|
||||
assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
|
||||
assign Zeros = (FmaPs^FmaAs)&~(R|G|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
|
||||
|
||||
|
||||
// is the result negitive
|
||||
|
@ -60,16 +60,14 @@ module round(
|
||||
output logic S, // sticky bit
|
||||
output logic [`NE+1:0] Me,
|
||||
output logic Plus1,
|
||||
output logic R, UfL // bits needed to calculate rounding
|
||||
output logic R, G // bits needed to calculate rounding
|
||||
);
|
||||
logic L; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic UfCalcPlus1;
|
||||
logic NormS; // normalized sum's sticky bit
|
||||
logic UfS; // sticky bit for underlow calculation
|
||||
logic [`NF-1:0] RoundFrac;
|
||||
logic FpRes, IntRes;
|
||||
logic UfR;
|
||||
logic FpRound, FpLSBRes, FpUfRound;
|
||||
logic FpG, FpL, FpR;
|
||||
logic L; // lsb of result
|
||||
logic CalcPlus1, FpPlus1;
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
@ -176,106 +174,101 @@ module round(
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
|
||||
assign S = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
assign FpG = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpL = Mf[`CORRSHIFTSZ-`NF];
|
||||
assign FpR = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
|
||||
assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
assign FpG = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpL = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
|
||||
assign FpR = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`NF];
|
||||
FpR = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
end
|
||||
`FMT1: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`NF1];
|
||||
FpR = Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
end
|
||||
`FMT2: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`NF2-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`NF2];
|
||||
FpR = Mf[`CORRSHIFTSZ-`NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpRound = 1'bx;
|
||||
FpLSBRes = 1'bx;
|
||||
FpUfRound = 1'bx;
|
||||
FpG = 1'bx;
|
||||
FpL = 1'bx;
|
||||
FpR = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`Q_NF];
|
||||
FpR = Mf[`CORRSHIFTSZ-`Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`D_NF];
|
||||
FpR = Mf[`CORRSHIFTSZ-`D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`S_NF];
|
||||
FpR = Mf[`CORRSHIFTSZ-`S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
|
||||
FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
|
||||
FpG = Mf[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpL = Mf[`CORRSHIFTSZ-`H_NF];
|
||||
FpR = Mf[`CORRSHIFTSZ-`H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
|
||||
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
|
||||
assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfL = FpRound;
|
||||
// determine sticky
|
||||
assign S = UfS | UfR;
|
||||
assign G = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpG;
|
||||
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpL;
|
||||
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpR;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (Frm)
|
||||
3'b000: CalcPlus1 = R & (S| L);//round to nearest even
|
||||
3'b000: CalcPlus1 = G & (R|S|L);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = Ms;//round down
|
||||
3'b011: CalcPlus1 = ~Ms;//round up
|
||||
3'b100: CalcPlus1 = R;//round to nearest max magnitude
|
||||
3'b100: CalcPlus1 = G;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (Frm)
|
||||
3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
|
||||
3'b000: UfCalcPlus1 = R & (S|G);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = Ms;//round down
|
||||
3'b011: UfCalcPlus1 = ~Ms;//round up
|
||||
3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
|
||||
3'b100: UfCalcPlus1 = R;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (S | R);
|
||||
assign Plus1 = CalcPlus1 & (S|R|G);
|
||||
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
|
||||
assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
|
||||
assign UfPlus1 = UfCalcPlus1 & (S|R);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
|
@ -45,6 +45,11 @@ module srt(
|
||||
output logic [`DIVN-2:0] D, // U0.N-1
|
||||
output logic [`DIVb+3:0] NextWSN, NextWCN,
|
||||
output logic [`DIVb+3:0] StickyWSA,
|
||||
output logic [`DIVb:0] LastSM,
|
||||
output logic [`DIVb-1:0] LastC,
|
||||
output logic [`DIVb:0] FirstSM,
|
||||
output logic [`DIVb-1:0] FirstC,
|
||||
output logic [`DIVCOPIES-1:0] qn,
|
||||
output logic [`DIVb+3:0] FirstWS, FirstWC
|
||||
);
|
||||
|
||||
@ -119,7 +124,7 @@ module srt(
|
||||
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
|
||||
divinteration divinteration(.D, .DBar, .D2, .DBar2, .SqrtM,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]),
|
||||
.C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]));
|
||||
.C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]), .qn(qn[i]));
|
||||
if(i<(`DIVCOPIES-1)) begin
|
||||
if (`RADIX==2)begin
|
||||
assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0};
|
||||
@ -159,6 +164,11 @@ module srt(
|
||||
assign FirstWS = WS[0];
|
||||
assign FirstWC = WC[0];
|
||||
|
||||
assign LastSM = SM[`DIVCOPIES-1];
|
||||
assign LastC = C[`DIVCOPIES-1];
|
||||
assign FirstSM = SM[0];
|
||||
assign FirstC = C[0];
|
||||
|
||||
if(`RADIX==2)
|
||||
if (`DIVCOPIES == 1)
|
||||
assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0};
|
||||
@ -182,6 +192,7 @@ module divinteration (
|
||||
input logic [`DIVb-1:0] C,
|
||||
input logic SqrtM,
|
||||
output logic [`DIVb:0] QNext, QMNext,
|
||||
output logic qn,
|
||||
output logic [`DIVb:0] SNext, SMNext,
|
||||
output logic [`DIVb+3:0] WSA, WCA
|
||||
);
|
||||
@ -202,7 +213,7 @@ module divinteration (
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
if(`RADIX == 2) begin : qsel
|
||||
qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz);
|
||||
qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz, qn);
|
||||
fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F);
|
||||
end else begin
|
||||
qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q);
|
||||
|
@ -46,12 +46,17 @@ module srtfsm(
|
||||
input logic [`DIVN-2:0] D, // U0.N-1
|
||||
input logic [`DIVb+3:0] StickyWSA,
|
||||
input logic [`DURLEN-1:0] Dur,
|
||||
input logic [`DIVb:0] LastSM,
|
||||
input logic [`DIVb:0] FirstSM,
|
||||
input logic [`DIVb-1:0] LastC,
|
||||
input logic [`DIVb-1:0] FirstC,
|
||||
input logic [`DIVCOPIES-1:0] qn,
|
||||
output logic [`DURLEN-1:0] EarlyTermShiftE,
|
||||
output logic DivSE,
|
||||
output logic DivDone,
|
||||
output logic NegSticky,
|
||||
output logic DivBusy
|
||||
);
|
||||
);
|
||||
|
||||
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
|
||||
statetype state;
|
||||
@ -64,14 +69,18 @@ module srtfsm(
|
||||
assign DivBusy = (state == BUSY);
|
||||
// calculate sticky bit
|
||||
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
|
||||
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
|
||||
// this is only a problem on radix 2 (and possibly maximally redundant 4) since minimally redundant
|
||||
// radix-4 division can't create a QM that continually adds 0's
|
||||
if (`RADIX == 2) begin
|
||||
logic [`DIVb+3:0] FNext;
|
||||
assign FNext = SqrtM ? 0 : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}};
|
||||
logic [`DIVb+3:0] FZero, FSticky;
|
||||
logic [`DIVb+2:0] LastK, FirstK;
|
||||
assign LastK = ({3'b111, LastC} & ~({3'b111, LastC} << 1));
|
||||
assign FirstK = ({3'b111, FirstC<<1} & ~({3'b111, FirstC<<1} << 1));
|
||||
assign FZero = SqrtM ? {LastSM[`DIVb], LastSM, 2'b0} | {LastK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}};
|
||||
assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}};
|
||||
// *** |... for continual -1 is not efficent fix - also only needed for radix-2
|
||||
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|((NextWSN+NextWCN+FNext)==0);
|
||||
assign DivSE = |W&~((W+FNext)==0); //***not efficent fix ==
|
||||
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]);
|
||||
assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn
|
||||
end else begin
|
||||
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0});
|
||||
assign DivSE = |W;
|
||||
|
@ -34,15 +34,15 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
|
||||
input logic clk,
|
||||
input logic [31:0] a,
|
||||
input logic we,
|
||||
input logic [`XLEN/8-1:0] ByteMask,
|
||||
input logic [`XLEN-1:0] wd,
|
||||
output logic [`XLEN-1:0] rd
|
||||
input logic [`LLEN/8-1:0] ByteMask,
|
||||
input logic [`LLEN-1:0] wd,
|
||||
output logic [`LLEN-1:0] rd
|
||||
);
|
||||
|
||||
localparam ADDR_WDITH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
localparam OFFSET = $clog2(`LLEN/8);
|
||||
|
||||
bram1p1rw #(`XLEN/8, 8, ADDR_WDITH)
|
||||
bram1p1rw #(`LLEN/8, 8, ADDR_WDITH)
|
||||
memory(.clk, .we, .bwe(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd));
|
||||
endmodule
|
||||
|
||||
|
@ -64,8 +64,10 @@ module hazard(
|
||||
assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
|
||||
// stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);
|
||||
// assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
|
||||
assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
|
||||
// WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap
|
||||
// assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE;
|
||||
assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)) | FDivBusyE;
|
||||
assign StallWCause = LSUStallM | IFUStallF;
|
||||
|
||||
|
@ -44,8 +44,6 @@ module datapath (
|
||||
input logic ALUResultSrcE,
|
||||
input logic JumpE,
|
||||
input logic BranchSignedE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
input logic [`XLEN-1:0] PCE,
|
||||
input logic [`XLEN-1:0] PCLinkE,
|
||||
output logic [1:0] FlagsE,
|
||||
@ -53,17 +51,16 @@ module datapath (
|
||||
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
// Memory stage signals
|
||||
input logic StallM, FlushM,
|
||||
input logic FWriteIntM,
|
||||
input logic FWriteIntM, FCvtIntW,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
output logic [`XLEN-1:0] SrcAM,
|
||||
output logic [`XLEN-1:0] WriteDataE,
|
||||
output logic [`XLEN-1:0] WriteDataM,
|
||||
// Writeback stage signals
|
||||
input logic StallW, FlushW,
|
||||
(* mark_debug = "true" *) input logic RegWriteW,
|
||||
input logic SquashSCW,
|
||||
input logic [2:0] ResultSrcW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
input logic [1:0] FResSelW,
|
||||
input logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
|
||||
@ -88,8 +85,8 @@ module datapath (
|
||||
// Writeback stage signals
|
||||
logic [`XLEN-1:0] SCResultW;
|
||||
logic [`XLEN-1:0] ResultW;
|
||||
logic [`XLEN-1:0] IFResultW;
|
||||
|
||||
logic [`XLEN-1:0] IFResultW, IFCvtResultW;
|
||||
|
||||
// Decode stage
|
||||
assign Rs1D = InstrD[19:15];
|
||||
assign Rs2D = InstrD[24:20];
|
||||
@ -118,30 +115,21 @@ module datapath (
|
||||
flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
|
||||
flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
|
||||
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
|
||||
|
||||
// Writeback stage pipeline register and logic
|
||||
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
|
||||
// *** simplify WriteDataE in this merge
|
||||
// floating point interactions: fcvt, fp stores
|
||||
if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
// floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
assign WriteDataE = ForwardedSrcBE;
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end else if (`F_SUPPORTED) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||
end else begin:fpmux
|
||||
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
|
||||
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW;
|
||||
end
|
||||
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
|
||||
else assign SCResultW = 0;
|
||||
|
@ -35,7 +35,7 @@ module forward(
|
||||
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
|
||||
input logic MemReadE, MDUE, CSRReadE,
|
||||
input logic RegWriteM, RegWriteW,
|
||||
input logic FWriteIntE,
|
||||
input logic FCvtIntE,
|
||||
input logic SCE,
|
||||
// Forwarding controls
|
||||
output logic [1:0] ForwardAE, ForwardBE,
|
||||
@ -58,7 +58,7 @@ module forward(
|
||||
|
||||
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
|
||||
assign MatchDE = (Rs1D == RdE) | (Rs2D == RdE); // Decode-stage instruction source depends on result from execute stage instruction
|
||||
assign FPUStallD = 0; // FWriteIntE & MatchDE; // FPU to Integer transfers have single-cycle latency
|
||||
assign FPUStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
|
||||
assign LoadStallD = (MemReadE|SCE) & MatchDE;
|
||||
assign MDUStallD = MDUE & MatchDE;
|
||||
assign CSRRdStallD = CSRReadE & MatchDE;
|
||||
|
@ -39,9 +39,7 @@ module ieu (
|
||||
// Execute Stage interface
|
||||
input logic [`XLEN-1:0] PCE,
|
||||
input logic [`XLEN-1:0] PCLinkE,
|
||||
input logic FWriteIntE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
input logic FWriteIntE, FCvtIntE, FCvtIntW,
|
||||
output logic [`XLEN-1:0] IEUAdrE,
|
||||
output logic MDUE, W64E,
|
||||
output logic [2:0] Funct3E,
|
||||
@ -51,7 +49,7 @@ module ieu (
|
||||
input logic SquashSCW, // from LSU
|
||||
output logic [1:0] MemRWM, // read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] WriteDataE, // Address and write data to LSU
|
||||
output logic [`XLEN-1:0] WriteDataM, // write data to LSU
|
||||
|
||||
output logic [2:0] Funct3M, // size and signedness to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
|
||||
@ -61,7 +59,6 @@ module ieu (
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
|
||||
input logic [1:0] FResSelW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
output logic [4:0] RdW,
|
||||
input logic [`XLEN-1:0] ReadDataW,
|
||||
@ -106,16 +103,16 @@ module ieu (
|
||||
|
||||
datapath dp(
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .IllegalFPUInstrE,
|
||||
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE,
|
||||
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
|
||||
forward fw(
|
||||
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,
|
||||
.MemReadE, .MDUE, .CSRReadE, .RegWriteM, .RegWriteW,
|
||||
.FWriteIntE, .SCE, .ForwardAE, .ForwardBE,
|
||||
.FCvtIntE, .SCE, .ForwardAE, .ForwardBE,
|
||||
.FPUStallD, .LoadStallD, .MDUStallD, .CSRRdStallD);
|
||||
endmodule
|
||||
|
||||
|
@ -187,7 +187,7 @@ module ifu (
|
||||
|
||||
if (`IMEM == `MEM_TIM) begin : irom // *** fix up dtim taking PA_BITS rather than XLEN, *** IEUAdr is a bad name. Probably use a ROM rather than DTIM
|
||||
dtim irom(.clk, .reset, .CPUBusy, .LSURWM(2'b10), .IEUAdrM({{(`XLEN-32){1'b0}}, PCPF[31:0]}), .IEUAdrE(PCNextFSpill),
|
||||
.TrapM(1'b0), .FinalWriteDataM(), .ByteMaskM('0),
|
||||
.TrapM(1'b0), .WriteDataM(), .ByteMaskM('0),
|
||||
.ReadDataWordM({{(`XLEN-32){1'b0}}, FinalInstrRawF}), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead),
|
||||
.BusCommittedM(), .DCacheStallM(ICacheStallF), .Cacheable(CacheableF),
|
||||
.DCacheCommittedM(), .DCacheMiss(ICacheMiss), .DCacheAccess(ICacheAccess));
|
||||
|
@ -34,23 +34,23 @@ module atomic (
|
||||
input logic clk,
|
||||
input logic reset, StallW,
|
||||
input logic [`XLEN-1:0] ReadDataM,
|
||||
input logic [`XLEN-1:0] LSUWriteDataM,
|
||||
input logic [`XLEN-1:0] IMWriteDataM,
|
||||
input logic [`PA_BITS-1:0] LSUPAdrM,
|
||||
input logic [6:0] LSUFunct7M,
|
||||
input logic [2:0] LSUFunct3M,
|
||||
input logic [1:0] LSUAtomicM,
|
||||
input logic [1:0] PreLSURWM,
|
||||
input logic IgnoreRequest,
|
||||
output logic [`XLEN-1:0] AMOWriteDataM,
|
||||
output logic [`XLEN-1:0] IMAWriteDataM,
|
||||
output logic SquashSCW,
|
||||
output logic [1:0] LSURWM);
|
||||
|
||||
logic [`XLEN-1:0] AMOResult;
|
||||
logic MemReadM;
|
||||
|
||||
amoalu amoalu(.srca(ReadDataM), .srcb(LSUWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
|
||||
amoalu amoalu(.srca(ReadDataM), .srcb(IMWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
|
||||
.result(AMOResult));
|
||||
mux2 #(`XLEN) wdmux(LSUWriteDataM, AMOResult, LSUAtomicM[1], AMOWriteDataM);
|
||||
mux2 #(`XLEN) wdmux(IMWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM);
|
||||
assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;
|
||||
lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
|
||||
.SquashSCW, .LSURWM);
|
||||
|
@ -71,14 +71,14 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
|
||||
logic [`PA_BITS-1:0] LocalLSUBusAdr;
|
||||
logic [LOGWPL-1:0] WordCountDelayed;
|
||||
|
||||
logic BufferCaptureEn;
|
||||
|
||||
// *** implement flops as an array if feasbile; DLSUBusBuffer might be a problem
|
||||
// *** better name than DLSUBusBuffer
|
||||
genvar index;
|
||||
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
|
||||
logic [WORDSPERLINE-1:0] CaptureWord;
|
||||
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
|
||||
assign CaptureWord[index] = BufferCaptureEn & (index == WordCountDelayed);
|
||||
flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
|
||||
.q(DLSUBusBuffer[(index+1)*`XLEN-1:index*`XLEN]));
|
||||
end
|
||||
@ -90,5 +90,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
|
||||
.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
|
||||
.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .SelLSUBusWord, .LSUBusRead,
|
||||
.BufferCaptureEn,
|
||||
.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
|
||||
endmodule
|
||||
|
@ -55,6 +55,7 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
output logic DCacheBusAck,
|
||||
output logic BusCommittedM,
|
||||
output logic SelUncachedAdr,
|
||||
output logic BufferCaptureEn,
|
||||
output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
|
||||
|
||||
|
||||
@ -78,6 +79,8 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
STATE_BUS_UNCACHED_READ_DONE,
|
||||
STATE_BUS_CPU_BUSY} busstatetype;
|
||||
|
||||
typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype;
|
||||
|
||||
(* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;
|
||||
|
||||
// Used to send address for address stage of AHB.
|
||||
@ -154,7 +157,7 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
|
||||
assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
|
||||
// Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
|
||||
assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00;
|
||||
assign LSUTransType = (|WordCount) & ~UnCachedRW ? AHB_SEQ : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? AHB_NONSEQ : AHB_IDLE;
|
||||
// Reset if we aren't initiating a transaction or if we are finishing a transaction.
|
||||
assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete;
|
||||
|
||||
@ -165,15 +168,15 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
(BusCurrState == STATE_BUS_WRITE);
|
||||
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
|
||||
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
|
||||
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE & ~WordCountFlag);
|
||||
assign SelLSUBusWord = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0]) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
|
||||
(BusCurrState == STATE_BUS_WRITE);
|
||||
|
||||
assign UnCachedLSUBusRead = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[1] & ~IgnoreRequest) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_READ);
|
||||
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
|
||||
|
||||
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH & ~(WordCountFlag)) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
|
||||
assign BufferCaptureEn = UnCachedLSUBusRead | BusCurrState == STATE_BUS_FETCH;
|
||||
|
||||
// Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
|
||||
assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;
|
||||
|
@ -36,10 +36,10 @@ module dtim(
|
||||
input logic [`XLEN-1:0] IEUAdrM,
|
||||
input logic [`XLEN-1:0] IEUAdrE,
|
||||
input logic TrapM,
|
||||
input logic [`XLEN-1:0] FinalWriteDataM,
|
||||
input logic [`XLEN/8-1:0] ByteMaskM,
|
||||
input logic [`LLEN-1:0] WriteDataM,
|
||||
input logic [`LLEN/8-1:0] ByteMaskM,
|
||||
input logic Cacheable,
|
||||
output logic [`XLEN-1:0] ReadDataWordM,
|
||||
output logic [`LLEN-1:0] ReadDataWordM,
|
||||
output logic BusStall,
|
||||
output logic LSUBusWrite,
|
||||
output logic LSUBusRead,
|
||||
@ -53,7 +53,7 @@ module dtim(
|
||||
.clk, .ByteMask(ByteMaskM),
|
||||
.a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently ***
|
||||
.we(LSURWM[0] & Cacheable & ~TrapM), // have to ignore write if Trap.
|
||||
.wd(FinalWriteDataM), .rd(ReadDataWordM));
|
||||
.wd(WriteDataM), .rd(ReadDataWordM));
|
||||
|
||||
// since we have a local memory the bus connections are all disabled.
|
||||
// There are no peripherals supported.
|
||||
|
@ -50,7 +50,7 @@ module lsu (
|
||||
// address and write data
|
||||
input logic [`XLEN-1:0] IEUAdrE,
|
||||
(* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM,
|
||||
input logic [`XLEN-1:0] WriteDataE,
|
||||
(* mark_debug = "true" *)input logic [`XLEN-1:0] WriteDataM,
|
||||
output logic [`LLEN-1:0] ReadDataW,
|
||||
// cpu privilege
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
@ -58,7 +58,6 @@ module lsu (
|
||||
input logic sfencevmaM,
|
||||
// fpu
|
||||
input logic [`FLEN-1:0] FWriteDataM,
|
||||
input logic FStore2,
|
||||
input logic FpLoadStoreM,
|
||||
// faults
|
||||
output logic LoadPageFaultM, StoreAmoPageFaultM,
|
||||
@ -111,15 +110,14 @@ module lsu (
|
||||
logic BusCommittedM, DCacheCommittedM;
|
||||
logic SelLSUBusWord;
|
||||
logic DataDAPageFaultM;
|
||||
logic [`XLEN-1:0] LSUWriteDataM;
|
||||
logic [`XLEN-1:0] WriteDataM;
|
||||
logic [`XLEN-1:0] IMWriteDataM, IMAWriteDataM;
|
||||
logic [`LLEN-1:0] IMAFWriteDataM;
|
||||
logic [`LLEN-1:0] ReadDataM;
|
||||
logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM;
|
||||
logic [(`LLEN-1)/8:0] ByteMaskM;
|
||||
|
||||
// *** TO DO: Burst mode
|
||||
|
||||
flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
|
||||
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM);
|
||||
assign IEUAdrExtM = {2'b00, IEUAdrM};
|
||||
assign LSUStallM = DCacheStallM | InterlockStall | BusStall;
|
||||
|
||||
@ -134,7 +132,7 @@ module lsu (
|
||||
.TrapM, .DCacheStallM, .SATP_REGW, .PCF,
|
||||
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
|
||||
.ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
|
||||
.IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
|
||||
.IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
|
||||
.LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW,
|
||||
.IgnoreRequestTLB);
|
||||
end else begin
|
||||
@ -143,7 +141,7 @@ module lsu (
|
||||
assign LSUAdrE = IEUAdrE[11:0];
|
||||
assign PreLSUPAdrM = IEUAdrExtM;
|
||||
assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM;
|
||||
assign LSUWriteDataM = WriteDataM;
|
||||
assign IMWriteDataM = WriteDataM;
|
||||
end
|
||||
|
||||
// CommittedM tells the CPU's privilege unit the current instruction
|
||||
@ -191,18 +189,20 @@ module lsu (
|
||||
// Memory System
|
||||
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
|
||||
logic [`LLEN-1:0] FinalWriteDataM;
|
||||
logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM;
|
||||
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
|
||||
logic [`LLEN-1:0] ReadDataWordMuxM;
|
||||
logic IgnoreRequest;
|
||||
logic SelUncachedAdr;
|
||||
assign IgnoreRequest = IgnoreRequestTLB | TrapM;
|
||||
|
||||
// The LSU allows both a DTIM and bus with cache. However, the PMA decoding presently
|
||||
// use the same RAM_BASE addresss for both the DTIM and any RAM in the Uncore.
|
||||
|
||||
if (`DMEM == `MEM_TIM) begin : dtim
|
||||
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
|
||||
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
|
||||
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
|
||||
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .WriteDataM(LSUWriteDataM), //*** fix the dtim FinalWriteData
|
||||
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
|
||||
.DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM),
|
||||
.DCacheMiss, .DCacheAccess);
|
||||
@ -230,20 +230,15 @@ module lsu (
|
||||
|
||||
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}),
|
||||
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
|
||||
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
|
||||
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM[`XLEN-1:0]),
|
||||
.s(SelUncachedAdr), .y(LSUBusHWDATA));
|
||||
|
||||
if(CACHE_ENABLED) begin : dcache
|
||||
if (`LLEN>`XLEN)
|
||||
mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
|
||||
else
|
||||
assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
|
||||
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
|
||||
.clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM),
|
||||
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
|
||||
.ByteMask(FinalByteMaskM), .WordCount,
|
||||
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
|
||||
.ByteMask(ByteMaskM), .WordCount,
|
||||
.FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM),
|
||||
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
|
||||
.IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM),
|
||||
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM),
|
||||
@ -263,26 +258,27 @@ module lsu (
|
||||
// Atomic operations
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (`A_SUPPORTED) begin:atomic
|
||||
atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM,
|
||||
atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .IMWriteDataM, .LSUPAdrM,
|
||||
.LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest,
|
||||
.AMOWriteDataM, .SquashSCW, .LSURWM);
|
||||
.IMAWriteDataM, .SquashSCW, .LSURWM);
|
||||
end else begin:lrsc
|
||||
assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign AMOWriteDataM = LSUWriteDataM;
|
||||
assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign IMAWriteDataM = IMWriteDataM;
|
||||
end
|
||||
|
||||
if (`F_SUPPORTED)
|
||||
mux2 #(`LLEN) datamux({{{`LLEN-`XLEN}{1'b0}}, IMAWriteDataM}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM);
|
||||
else assign IMAFWriteDataM = IMAWriteDataM;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Subword Accesses
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
|
||||
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
|
||||
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
|
||||
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM);
|
||||
.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM);
|
||||
|
||||
// Compute byte masks
|
||||
swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM));
|
||||
// *** fix when when fstore2 is valid. I'm not sure this is even needed if LSUFunct3M can be 3'b100 for a 16 byte write.
|
||||
//assign FinalByteMaskM = FStore2 ? '1 : ByteMaskM;
|
||||
assign FinalByteMaskM = ByteMaskM;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MW Pipeline Register
|
||||
@ -296,10 +292,10 @@ module lsu (
|
||||
// swap the bytes when read from big-endian memory
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (`BIGENDIAN_SUPPORTED) begin:endian
|
||||
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
|
||||
bigendianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM));
|
||||
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
|
||||
end else begin
|
||||
assign IEUWriteDataM = LittleEndianWriteDataM;
|
||||
assign LSUWriteDataM = LittleEndianWriteDataM;
|
||||
assign LittleEndianReadDataWordM = ReadDataWordM;
|
||||
end
|
||||
|
||||
|
@ -54,7 +54,7 @@ module lsuvirtmem(
|
||||
output logic [6:0] LSUFunct7M,
|
||||
input logic [`XLEN-1:0] IEUAdrE,
|
||||
output logic [`XLEN-1:0] PTE,
|
||||
output logic [`XLEN-1:0] LSUWriteDataM,
|
||||
output logic [`XLEN-1:0] IMWriteDataM,
|
||||
output logic [1:0] PageType,
|
||||
output logic [1:0] PreLSURWM,
|
||||
output logic [1:0] LSUAtomicM,
|
||||
@ -112,8 +112,8 @@ module lsuvirtmem(
|
||||
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE);
|
||||
mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM);
|
||||
if(`HPTW_WRITES_SUPPORTED)
|
||||
mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, LSUWriteDataM);
|
||||
else assign LSUWriteDataM = WriteDataM;
|
||||
mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM);
|
||||
else assign IMWriteDataM = WriteDataM;
|
||||
mux2 #(12) replaymux(PreLSUAdrE, IEUAdrExtM[11:0], SelReplayMemE, LSUAdrE); // replay cpu request after hptw. *** redudant with mux in cache.
|
||||
|
||||
// always block interrupts when using the hardware page table walker.
|
||||
|
@ -33,25 +33,34 @@
|
||||
module subwordwrite (
|
||||
input logic [2:0] LSUPAdrM,
|
||||
input logic [2:0] LSUFunct3M,
|
||||
input logic [`XLEN-1:0] AMOWriteDataM,
|
||||
output logic [`XLEN-1:0] LittleEndianWriteDataM);
|
||||
input logic [`LLEN-1:0] IMAFWriteDataM,
|
||||
output logic [`LLEN-1:0] LittleEndianWriteDataM);
|
||||
|
||||
// Replicate data for subword writes
|
||||
if (`XLEN == 64) begin:sww
|
||||
if (`LLEN == 128) begin:sww
|
||||
always_comb
|
||||
case(LSUFunct3M[2:0])
|
||||
3'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb
|
||||
3'b001: LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh
|
||||
3'b010: LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw
|
||||
3'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd
|
||||
default: LittleEndianWriteDataM = IMAFWriteDataM; // sq
|
||||
endcase
|
||||
end else if (`LLEN == 64) begin:sww
|
||||
always_comb
|
||||
case(LSUFunct3M[1:0])
|
||||
2'b00: LittleEndianWriteDataM = {8{AMOWriteDataM[7:0]}}; // sb
|
||||
2'b01: LittleEndianWriteDataM = {4{AMOWriteDataM[15:0]}}; // sh
|
||||
2'b10: LittleEndianWriteDataM = {2{AMOWriteDataM[31:0]}}; // sw
|
||||
2'b11: LittleEndianWriteDataM = AMOWriteDataM; // sw
|
||||
2'b00: LittleEndianWriteDataM = {8{IMAFWriteDataM[7:0]}}; // sb
|
||||
2'b01: LittleEndianWriteDataM = {4{IMAFWriteDataM[15:0]}}; // sh
|
||||
2'b10: LittleEndianWriteDataM = {2{IMAFWriteDataM[31:0]}}; // sw
|
||||
2'b11: LittleEndianWriteDataM = IMAFWriteDataM; // sd
|
||||
endcase
|
||||
end else begin:sww // 32-bit
|
||||
always_comb
|
||||
case(LSUFunct3M[1:0])
|
||||
2'b00: LittleEndianWriteDataM = {4{AMOWriteDataM[7:0]}}; // sb
|
||||
2'b01: LittleEndianWriteDataM = {2{AMOWriteDataM[15:0]}}; // sh
|
||||
2'b10: LittleEndianWriteDataM = AMOWriteDataM; // sw
|
||||
default: LittleEndianWriteDataM = AMOWriteDataM; // shouldn't happen
|
||||
2'b00: LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}}; // sb
|
||||
2'b01: LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh
|
||||
2'b10: LittleEndianWriteDataM = IMAFWriteDataM; // sw
|
||||
default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen
|
||||
endcase
|
||||
end
|
||||
endmodule
|
||||
|
@ -38,17 +38,17 @@ module adrdecs (
|
||||
output logic [8:0] SelRegions
|
||||
);
|
||||
|
||||
localparam logic [3:0] SUPPORTED_SIZE = (`LLEN == 32 ? 4'b0111 : 4'b1111);
|
||||
// Determine which region of physical memory (if any) is being accessed
|
||||
// *** eventually uncomment Access signals
|
||||
adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, 4'b1111, SelRegions[7]);
|
||||
adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, /*1'b1*/AccessRX, Size, 4'b1111, SelRegions[6]);
|
||||
adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, /*1'b1*/AccessRWX, Size, 4'b1111, SelRegions[5]);
|
||||
adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]);
|
||||
adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[6]);
|
||||
adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[5]);
|
||||
|
||||
adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[4]);
|
||||
adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[4]);
|
||||
adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[3]);
|
||||
adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[2]);
|
||||
adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[1]);
|
||||
adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, 4'b1100, SelRegions[0]); // *** PMA chapter says xlen only like CLINT
|
||||
adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[0]);
|
||||
|
||||
assign SelRegions[8] = ~|(SelRegions[7:0]);
|
||||
|
||||
|
@ -48,7 +48,7 @@ module hptw
|
||||
output logic [1:0] PageType, // page type to TLBs
|
||||
(* mark_debug = "true" *) output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry
|
||||
output logic [`PA_BITS-1:0] HPTWAdr,
|
||||
output logic [1:0] HPTWRW, // HPTW requesting to read memory
|
||||
output logic [1:0] HPTWRW, // HPTW requesting to write or read memory
|
||||
output logic [2:0] HPTWSize // 32 or 64 bit access.
|
||||
);
|
||||
|
||||
@ -114,13 +114,15 @@ module hptw
|
||||
logic [`PA_BITS-1:0] HPTWWriteAdr;
|
||||
logic SetDirty;
|
||||
logic Dirty, Accessed;
|
||||
logic [`XLEN-1:0] AccessedPTE;
|
||||
|
||||
assign NextPTE = UpdatePTE ? {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]} : HPTWReadPTE;
|
||||
assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit
|
||||
mux2 #(`XLEN) NextPTEMux(HPTWReadPTE, AccessedPTE, UpdatePTE, NextPTE);
|
||||
flopenr #(`PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr);
|
||||
|
||||
assign SaveHPTWAdr = WalkerState == L0_ADR;
|
||||
assign SelHPTWWriteAdr = UpdatePTE | HPTWRW[0];
|
||||
mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr);
|
||||
|
||||
|
||||
assign {Dirty, Accessed} = PTE[7:6];
|
||||
assign WriteAccess = MemRWM[0] | (|AtomicM);
|
||||
@ -255,9 +257,7 @@ module hptw
|
||||
else NextWalkerState = LEAF;
|
||||
LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = IDLE;
|
||||
// *** TODO update PTE with dirty/access. write to TLB and update memory.
|
||||
// probably want to write the PTE in UPDATE_PTE then go to leaf and update TLB.
|
||||
UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE;
|
||||
UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE;
|
||||
else NextWalkerState = LEAF;
|
||||
default: begin
|
||||
NextWalkerState = IDLE; // should never be reached
|
||||
|
@ -43,7 +43,7 @@ module privdec (
|
||||
output logic EcallFaultM, BreakpointFaultM,
|
||||
output logic sretM, mretM, wfiM, sfencevmaM);
|
||||
|
||||
logic IllegalPrivilegedInstrM, IllegalOrDisabledFPUInstrM;
|
||||
logic IllegalPrivilegedInstrM;
|
||||
logic WFITimeoutM;
|
||||
logic StallMQ;
|
||||
logic ebreakM, ecallM;
|
||||
@ -92,7 +92,6 @@ module privdec (
|
||||
// Fault on illegal instructions
|
||||
///////////////////////////////////////////
|
||||
assign IllegalPrivilegedInstrM = PrivilegedM & ~(sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM);
|
||||
assign IllegalOrDisabledFPUInstrM = IllegalFPUInstrM | (STATUS_FS == 2'b00);
|
||||
assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalOrDisabledFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM |
|
||||
assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM |
|
||||
WFITimeoutM;
|
||||
endmodule
|
||||
|
@ -52,7 +52,7 @@ module privileged (
|
||||
input logic ICacheAccess,
|
||||
input logic PrivilegedM,
|
||||
input logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM,
|
||||
input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrD,
|
||||
input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrM,
|
||||
input logic LoadMisalignedFaultM,
|
||||
input logic StoreAmoMisalignedFaultM,
|
||||
input logic MTimerInt, MExtInt, SExtInt, MSwInt,
|
||||
@ -69,7 +69,6 @@ module privileged (
|
||||
input logic StoreAmoAccessFaultM,
|
||||
input logic SelHPTW,
|
||||
|
||||
output logic IllegalFPUInstrE,
|
||||
output logic [1:0] PrivilegeModeW,
|
||||
output logic [`XLEN-1:0] SATP_REGW,
|
||||
output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
@ -88,7 +87,6 @@ module privileged (
|
||||
logic sretM, mretM;
|
||||
logic IllegalCSRAccessM;
|
||||
logic IllegalIEUInstrFaultM;
|
||||
logic IllegalFPUInstrM;
|
||||
logic InstrPageFaultM;
|
||||
logic InstrAccessFaultM;
|
||||
logic IllegalInstrFaultM;
|
||||
@ -148,9 +146,8 @@ module privileged (
|
||||
.IllegalCSRAccessM, .BigEndianM);
|
||||
|
||||
privpiperegs ppr(.clk, .reset, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
|
||||
.InstrPageFaultF, .InstrAccessFaultF, .IllegalIEUInstrFaultD, .IllegalFPUInstrD,
|
||||
.IllegalFPUInstrE,
|
||||
.InstrPageFaultM, .InstrAccessFaultM, .IllegalIEUInstrFaultM, .IllegalFPUInstrM);
|
||||
.InstrPageFaultF, .InstrAccessFaultF, .IllegalIEUInstrFaultD,
|
||||
.InstrPageFaultM, .InstrAccessFaultM, .IllegalIEUInstrFaultM);
|
||||
|
||||
trap trap(.reset,
|
||||
.InstrMisalignedFaultM, .InstrAccessFaultM, .IllegalInstrFaultM,
|
||||
|
@ -35,10 +35,9 @@ module privpiperegs (
|
||||
input logic StallD, StallE, StallM,
|
||||
input logic FlushD, FlushE, FlushM,
|
||||
input logic InstrPageFaultF, InstrAccessFaultF,
|
||||
input logic IllegalIEUInstrFaultD, IllegalFPUInstrD,
|
||||
output logic IllegalFPUInstrE,
|
||||
input logic IllegalIEUInstrFaultD,
|
||||
output logic InstrPageFaultM, InstrAccessFaultM,
|
||||
output logic IllegalIEUInstrFaultM, IllegalFPUInstrM
|
||||
output logic IllegalIEUInstrFaultM
|
||||
);
|
||||
|
||||
logic InstrPageFaultD, InstrAccessFaultD;
|
||||
@ -49,10 +48,10 @@ module privpiperegs (
|
||||
flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
|
||||
{InstrPageFaultF, InstrAccessFaultF},
|
||||
{InstrPageFaultD, InstrAccessFaultD});
|
||||
flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD, IllegalFPUInstrD},
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE});
|
||||
flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE},
|
||||
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM, IllegalFPUInstrM});
|
||||
flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD},
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
|
||||
flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
|
||||
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});
|
||||
endmodule
|
@ -92,13 +92,12 @@ module wallypipelinedcore (
|
||||
logic [4:0] RdM, RdW;
|
||||
logic FStallD;
|
||||
logic FWriteIntE;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic FStore2;
|
||||
logic [`FLEN-1:0] FWriteDataM;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic FCvtIntW;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic IllegalFPUInstrM;
|
||||
logic FRegWriteM;
|
||||
logic FPUStallD;
|
||||
logic FpLoadStoreM;
|
||||
@ -131,7 +130,7 @@ module wallypipelinedcore (
|
||||
// cpu lsu interface
|
||||
logic [2:0] Funct3M;
|
||||
logic [`XLEN-1:0] IEUAdrE;
|
||||
(* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataE;
|
||||
(* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataM;
|
||||
(* mark_debug = "true" *) logic [`XLEN-1:0] IEUAdrM;
|
||||
logic [`LLEN-1:0] ReadDataW;
|
||||
logic CommittedM;
|
||||
@ -172,6 +171,7 @@ module wallypipelinedcore (
|
||||
logic BreakpointFaultM, EcallFaultM;
|
||||
logic InstrDAPageFaultF;
|
||||
logic BigEndianM;
|
||||
logic FCvtIntE;
|
||||
|
||||
ifu ifu(
|
||||
.clk, .reset,
|
||||
@ -219,15 +219,15 @@ module wallypipelinedcore (
|
||||
.IllegalBaseInstrFaultD,
|
||||
|
||||
// Execute Stage interface
|
||||
.PCE, .PCLinkE, .FWriteIntE, .IllegalFPUInstrE,
|
||||
.FWriteDataE, .IEUAdrE, .MDUE, .W64E,
|
||||
.PCE, .PCLinkE, .FWriteIntE, .FCvtIntE,
|
||||
.IEUAdrE, .MDUE, .W64E,
|
||||
.Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
|
||||
// Memory stage interface
|
||||
.SquashSCW, // from LSU
|
||||
.MemRWM, // read/write control goes to LSU
|
||||
.AtomicM, // atomic control goes to LSU
|
||||
.WriteDataE, // Write data to LSU
|
||||
.WriteDataM, // Write data to LSU
|
||||
.Funct3M, // size and signedness to LSU
|
||||
.SrcAM, // to privilege and fpu
|
||||
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
|
||||
@ -237,7 +237,7 @@ module wallypipelinedcore (
|
||||
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
|
||||
.InstrValidM,
|
||||
.FCvtIntResW,
|
||||
.FResSelW,
|
||||
.FCvtIntW,
|
||||
|
||||
// hazards
|
||||
.StallD, .StallE, .StallM, .StallW,
|
||||
@ -258,9 +258,9 @@ module wallypipelinedcore (
|
||||
.CommittedM, .DCacheMiss, .DCacheAccess,
|
||||
.SquashSCW,
|
||||
.FpLoadStoreM,
|
||||
.FWriteDataM, .FStore2,
|
||||
.FWriteDataM,
|
||||
//.DataMisalignedM(DataMisalignedM),
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataE,
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataM,
|
||||
.ReadDataW, .FlushDCacheM,
|
||||
// connected to ahb (all stay the same)
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
|
||||
@ -346,7 +346,7 @@ module wallypipelinedcore (
|
||||
.RASPredPCWrongM, .BPPredClassNonCFIWrongM,
|
||||
.InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM,
|
||||
.InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM,
|
||||
.InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD,
|
||||
.InstrMisalignedFaultM, .IllegalIEUInstrFaultD,
|
||||
.LoadMisalignedFaultM, .StoreAmoMisalignedFaultM,
|
||||
.MTimerInt, .MExtInt, .SExtInt, .MSwInt,
|
||||
.MTIME_CLINT,
|
||||
@ -356,7 +356,7 @@ module wallypipelinedcore (
|
||||
// *** do these need to be split up into one for dmem and one for ifu?
|
||||
// instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem?
|
||||
.InstrAccessFaultF, .LoadAccessFaultM, .StoreAmoAccessFaultM, .SelHPTW,
|
||||
.IllegalFPUInstrE,
|
||||
.IllegalFPUInstrM,
|
||||
.PrivilegeModeW, .SATP_REGW,
|
||||
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS,
|
||||
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
|
||||
@ -397,25 +397,24 @@ module wallypipelinedcore (
|
||||
.STATUS_FS, // is floating-point enabled?
|
||||
.FRegWriteM, // FP register write enable
|
||||
.FpLoadStoreM,
|
||||
.FStore2,
|
||||
.FStallD, // Stall the decode stage
|
||||
.FWriteIntE, // integer register write enable
|
||||
.FWriteDataE, // Data to be written to memory
|
||||
.FStallD, // Stall the decode stage
|
||||
.FWriteIntE, .FCvtIntE, // integer register write enable, conversion operation
|
||||
.FWriteDataM, // Data to be written to memory
|
||||
.FIntResM, // data to be written to integer register
|
||||
.FCvtIntResW, // fp -> int conversion result to be stored in int register
|
||||
.FResSelW, // fpu result selection
|
||||
.FCvtIntW, // fpu result selection
|
||||
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
.IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
|
||||
.SetFflagsM // FPU flags (to privileged unit)
|
||||
); // floating point unit
|
||||
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
|
||||
assign FStallD = 0;
|
||||
assign FWriteIntE = 0;
|
||||
assign FWriteDataE = 0;
|
||||
assign FCvtIntE = 0;
|
||||
assign FIntResM = 0;
|
||||
assign FCvtIntW = 0;
|
||||
assign FDivBusyE = 0;
|
||||
assign IllegalFPUInstrD = 1;
|
||||
assign IllegalFPUInstrM = 1;
|
||||
assign SetFflagsM = 0;
|
||||
end
|
||||
endmodule
|
||||
|
@ -395,9 +395,11 @@ module riscvassertions;
|
||||
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
|
||||
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
|
||||
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
|
||||
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
|
||||
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)");
|
||||
assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)");
|
||||
assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)");
|
||||
assert (`DMEM == `MEM_CACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN");
|
||||
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
|
||||
// assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
|
||||
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
|
||||
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
|
||||
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
|
||||
@ -423,6 +425,8 @@ module riscvassertions;
|
||||
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
|
||||
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
|
||||
end
|
||||
|
||||
// *** DH 8/23/
|
||||
endmodule
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user