From 39ae7435437bde3135ca5873a5a0ddb507ff548f Mon Sep 17 00:00:00 2001 From: bbracker Date: Fri, 28 May 2021 23:11:37 -0400 Subject: [PATCH 1/4] turns out I should not have tried renaming FStallD to FPUStallD because that name was already used! All the same it does feel weird to have two such signals floating around \(ah pun!\) --- wally-pipelined/regression/wally-pipelined.do | 2 +- .../regression/wave-dos/peripheral-waves.do | 5 + wally-pipelined/src/hazard/hazard.sv | 40 +++---- wally-pipelined/src/ifu/ifu.sv | 9 +- wally-pipelined/src/privileged/csr.sv | 38 ++++--- wally-pipelined/src/privileged/csrsr.sv | 102 +++++++++--------- wally-pipelined/src/privileged/privileged.sv | 4 +- .../src/wally/wallypipelinedhart.sv | 4 +- .../testbench/testbench-imperas.sv | 13 ++- 9 files changed, 118 insertions(+), 99 deletions(-) diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index 51335b82..500e1fe6 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -40,7 +40,7 @@ vsim workopt view wave -- display input and output signals as hexidecimal values -do ./wave-dos/default-waves.do +do ./wave-dos/peripheral-waves.do -- Run the Simulation #run 5000 diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index f92c1af5..3c4945c7 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -48,6 +48,11 @@ add wave /testbench/dut/hart/ieu/dp/RegWriteW add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RdW add wave -divider +add wave -hex /testbench/dut/hart/priv/csr/ProposedEPCM +add wave -hex /testbench/dut/hart/priv/csr/TrapM +add wave -hex /testbench/dut/hart/priv/csr/UnalignedNextEPCM +add wave -hex /testbench/dut/hart/priv/csr/genblk1/csrm/WriteMEPCM +add wave -hex /testbench/dut/hart/priv/csr/genblk1/csrm/MEPC_REGW add wave -divider # peripherals diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 35aa9835..72857fb3 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -30,16 +30,15 @@ module hazard( input logic reset, // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, - input logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD, + input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DataStall, ICacheStallF, - input logic FStallD, + input logic FPUStallD, input logic DivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, output logic FlushF, FlushD, FlushE, FlushM, FlushW ); - logic BranchFlushDE; logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; @@ -56,34 +55,29 @@ module hazard( // A stage must stall if the next stage is stalled // If any stages are stalled, the first stage that isn't stalled must flush. - assign BranchFlushDE = BPPredWrongE | RetM | TrapM; - - assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); - assign StallDCause = (FPUStallD | LoadStallD | MulDivStallD | CSRRdStallD | FStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous -// assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE); + assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE; assign StallMCause = 0; - assign StallWCause = DataStall | ICacheStallF; + assign StallWCause = DataStall || ICacheStallF; - // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. - assign StallF = StallD | StallFCause; - - assign StallD = StallE | StallDCause; - assign StallE = StallM | StallECause; - assign StallM = StallW | StallMCause; + assign StallF = StallFCause || StallD; + assign StallD = StallDCause || StallE; + assign StallE = StallECause || StallM; + assign StallM = StallMCause || StallW; assign StallW = StallWCause; //assign FirstUnstalledD = (~StallD & StallF & ~MulDivStallD); - assign FirstUnstalledD = (~StallD & StallF); //assign FirstUnstalledE = (~StallE & StallD & ~MulDivStallD); - assign FirstUnstalledE = (~StallE & StallD); - assign FirstUnstalledM = (~StallM & StallE); - assign FirstUnstalledW = (~StallW & StallM);; + assign FirstUnstalledD = (~StallD && StallF); + assign FirstUnstalledE = (~StallE && StallD); + assign FirstUnstalledM = (~StallM && StallE); + assign FirstUnstalledW = (~StallW && StallM); // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush assign FlushF = BPPredWrongE; - assign FlushD = FirstUnstalledD || BranchFlushDE; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; - assign FlushE = FirstUnstalledE || BranchFlushDE; // LoadStallD | PCSrcE | RetM | TrapM; - assign FlushM = FirstUnstalledM || RetM || TrapM; - assign FlushW = FirstUnstalledW | TrapM; + assign FlushD = FirstUnstalledD || TrapM || RetM || BPPredWrongE; + assign FlushE = FirstUnstalledE || TrapM || RetM || BPPredWrongE; + assign FlushM = FirstUnstalledM || TrapM || RetM; + assign FlushW = FirstUnstalledW || TrapM; endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 994288bd..28f7597e 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -37,7 +37,8 @@ module ifu ( output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, output logic ICacheStallF, - // Decode + // Decode + output logic [`XLEN-1:0] PCD, // Execute output logic [`XLEN-1:0] PCLinkE, input logic PCSrcE, @@ -47,7 +48,7 @@ module ifu ( // Mem input logic RetM, TrapM, input logic [`XLEN-1:0] PrivilegedNextPCM, - output logic [31:0] InstrD, InstrM, + output logic [31:0] InstrD, InstrE, InstrM, InstrW, output logic [`XLEN-1:0] PCM, output logic [4:0] InstrClassM, output logic BPPredDirWrongM, @@ -76,9 +77,9 @@ module ifu ( logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCNextPF, PCPF; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCW, PCLinkD, PCLinkM, PCNextPF, PCPF; logic CompressedF; - logic [31:0] InstrRawD, InstrE, InstrW; + logic [31:0] InstrRawD; localparam [31:0] nop = 32'h00000013; // instruction for NOP logic reset_q; // *** look at this later. diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 744b8f9b..89d71fb5 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -34,8 +34,8 @@ module csr #(parameter ) ( input logic clk, reset, input logic FlushW, StallD, StallE, StallM, StallW, - input logic [31:0] InstrM, - input logic [`XLEN-1:0] PCM, SrcAM, + input logic [31:0] InstrD,InstrE,InstrM, + input logic [`XLEN-1:0] PCF, PCD, PCE, PCM, SrcAM, input logic InterruptM, input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM, input logic TimerIntM, ExtIntM, SwIntM, @@ -47,6 +47,9 @@ module csr #(parameter input logic [4:0] InstrClassM, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, input logic [`XLEN-1:0] CauseM, NextFaultMtvalM, + input logic BreakpointFaultM, EcallFaultM, + input logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultM, + input logic LoadMisalignedFaultM, StoreMisalignedFaultM, LoadAccessFaultM, StoreAccessFaultM, output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, output logic [`XLEN-1:0] MEPC_REGW, SEPC_REGW, UEPC_REGW, UTVEC_REGW, STVEC_REGW, MTVEC_REGW, @@ -65,6 +68,7 @@ module csr #(parameter output logic IllegalCSRAccessM ); + localparam NOP = 32'h13; logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRNReadValM, CSRCReadValM, CSRReadValM; logic [`XLEN-1:0] CSRSrcM, CSRRWM, CSRRSM, CSRRCM, CSRWriteValM; @@ -73,22 +77,32 @@ module csr #(parameter logic WriteMSTATUSM, WriteSSTATUSM, WriteUSTATUSM; logic CSRMWriteM, CSRSWriteM, CSRUWriteM; - logic [`XLEN-1:0] UnalignedNextEPCM, NextEPCM, preservedPCM, readPCM, NextCauseM, NextMtvalM; - - always_ff @(posedge clk) begin - preservedPCM <= PCM; - end - - mux2 #(`XLEN) pcmux(PCM, preservedPCM, InterruptM, readPCM); - //flop #(`XLEN) CSRReadPCMreg(clk, reset, PCM, readPCM); + logic MStageFailed; + logic [`XLEN-1:0] ProposedEPCM, UnalignedNextEPCM, NextEPCM, NextCauseM, NextMtvalM; logic [11:0] CSRAdrM; logic [11:0] SIP_REGW, SIE_REGW; //logic [11:0] UIP_REGW, UIE_REGW = 0; // N user-mode exceptions not supported logic IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM, IllegalCSRNAccessM, InsufficientCSRPrivilegeM; - logic IllegalCSRMWriteReadonlyM; + assign MStageFailed = BreakpointFaultM || EcallFaultM || InstrMisalignedFaultM || InstrAccessFaultM || IllegalInstrFaultM || LoadMisalignedFaultM || StoreMisalignedFaultM || LoadAccessFaultM || StoreAccessFaultM; + always_comb begin + if (MStageFailed) + casez({InstrD==NOP,InstrE==NOP,InstrM==NOP}) + 3'b??0: ProposedEPCM = PCM; + 3'b?01: ProposedEPCM = PCE; + 3'b011: ProposedEPCM = PCD; + 3'b111: ProposedEPCM = PCF; + endcase + else + casez({InstrD==NOP,InstrE==NOP}) + 2'b?0: ProposedEPCM = PCE; + 2'b01: ProposedEPCM = PCD; + 2'b11: ProposedEPCM = PCF; + endcase + end + generate if (`ZCSR_SUPPORTED) begin // modify CSRs @@ -109,7 +123,7 @@ module csr #(parameter // write CSRs assign CSRAdrM = InstrM[31:20]; - assign UnalignedNextEPCM = TrapM ? readPCM : CSRWriteValM; + assign UnalignedNextEPCM = TrapM ? ProposedEPCM : CSRWriteValM; assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment assign NextCauseM = TrapM ? CauseM : CSRWriteValM; assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index 8c5c7a3d..0b36df49 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -109,74 +109,74 @@ module csrsr ( // complex register with reset, write enable, and the ability to update other bits in certain cases always_ff @(posedge clk, posedge reset) if (reset) begin - STATUS_SUM_INT <= 0; - STATUS_MPRV_INT <= 0; // Per Priv 3.3 - STATUS_FS_INT <= 0; //2'b01; // busybear: change all these reset values to 0 - STATUS_MPP <= 0; //`M_MODE; - STATUS_SPP <= 0; //1'b1; - STATUS_MPIE <= 0; //1; - STATUS_SPIE <= 0; //`S_SUPPORTED; - STATUS_UPIE <= 0; // `U_SUPPORTED; - STATUS_MIE <= 0; // Per Priv 3.3 - STATUS_SIE <= 0; //`S_SUPPORTED; - STATUS_UIE <= 0; //`U_SUPPORTED; + STATUS_SUM_INT <= #1 0; + STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 + STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 + STATUS_MPP <= #1 0; //`M_MODE; + STATUS_SPP <= #1 0; //1'b1; + STATUS_MPIE <= #1 0; //1; + STATUS_SPIE <= #1 0; //`S_SUPPORTED; + STATUS_UPIE <= #1 0; // `U_SUPPORTED; + STATUS_MIE <= #1 0; // Per Priv 3.3 + STATUS_SIE <= #1 0; //`S_SUPPORTED; + STATUS_UIE <= #1 0; //`U_SUPPORTED; end else if (~StallW) begin if (WriteMSTATUSM) begin - STATUS_SUM_INT <= CSRWriteValM[18]; - STATUS_MPRV_INT <= CSRWriteValM[17]; - STATUS_FS_INT <= CSRWriteValM[14:13]; - STATUS_MPP <= STATUS_MPP_NEXT; - STATUS_SPP <= `S_SUPPORTED & CSRWriteValM[8]; - STATUS_MPIE <= CSRWriteValM[7]; - STATUS_SPIE <= `S_SUPPORTED & CSRWriteValM[5]; - STATUS_UPIE <= `U_SUPPORTED & CSRWriteValM[4]; - STATUS_MIE <= CSRWriteValM[3]; - STATUS_SIE <= `S_SUPPORTED & CSRWriteValM[1]; - STATUS_UIE <= `U_SUPPORTED & CSRWriteValM[0]; + STATUS_SUM_INT <= #1 CSRWriteValM[18]; + STATUS_MPRV_INT <= #1 CSRWriteValM[17]; + STATUS_FS_INT <= #1 CSRWriteValM[14:13]; + STATUS_MPP <= #1 STATUS_MPP_NEXT; + STATUS_SPP <= #1 `S_SUPPORTED & CSRWriteValM[8]; + STATUS_MPIE <= #1 CSRWriteValM[7]; + STATUS_SPIE <= #1 `S_SUPPORTED & CSRWriteValM[5]; + STATUS_UPIE <= #1 `U_SUPPORTED & CSRWriteValM[4]; + STATUS_MIE <= #1 CSRWriteValM[3]; + STATUS_SIE <= #1 `S_SUPPORTED & CSRWriteValM[1]; + STATUS_UIE <= #1 `U_SUPPORTED & CSRWriteValM[0]; end else if (WriteSSTATUSM) begin // write a subset of the STATUS bits - STATUS_SUM_INT <= CSRWriteValM[18]; - STATUS_FS_INT <= CSRWriteValM[14:13]; - STATUS_SPP <= `S_SUPPORTED & CSRWriteValM[8]; - STATUS_SPIE <= `S_SUPPORTED & CSRWriteValM[5]; - STATUS_UPIE <= `U_SUPPORTED & CSRWriteValM[4]; - STATUS_SIE <= `S_SUPPORTED & CSRWriteValM[1]; - STATUS_UIE <= `U_SUPPORTED & CSRWriteValM[0]; + STATUS_SUM_INT <= #1 CSRWriteValM[18]; + STATUS_FS_INT <= #1 CSRWriteValM[14:13]; + STATUS_SPP <= #1 `S_SUPPORTED & CSRWriteValM[8]; + STATUS_SPIE <= #1 `S_SUPPORTED & CSRWriteValM[5]; + STATUS_UPIE <= #1 `U_SUPPORTED & CSRWriteValM[4]; + STATUS_SIE <= #1 `S_SUPPORTED & CSRWriteValM[1]; + STATUS_UIE <= #1 `U_SUPPORTED & CSRWriteValM[0]; end else if (WriteUSTATUSM) begin // write a subset of the STATUS bits - STATUS_FS_INT <= CSRWriteValM[14:13]; - STATUS_UPIE <= `U_SUPPORTED & CSRWriteValM[4]; - STATUS_UIE <= `U_SUPPORTED & CSRWriteValM[0]; + STATUS_FS_INT <= #1 CSRWriteValM[14:13]; + STATUS_UPIE <= #1 `U_SUPPORTED & CSRWriteValM[4]; + STATUS_UIE <= #1 `U_SUPPORTED & CSRWriteValM[0]; end else begin - if (FloatRegWriteW) STATUS_FS_INT <=2'b11; // mark Float State dirty + if (FloatRegWriteW) STATUS_FS_INT <= #12'b11; // mark Float State dirty if (TrapM) begin // Update interrupt enables per Privileged Spec p. 21 // y = PrivilegeModeW // x = NextPrivilegeModeM // Modes: 11 = Machine, 01 = Supervisor, 00 = User if (NextPrivilegeModeM == `M_MODE) begin - STATUS_MPIE <= STATUS_MIE; - STATUS_MIE <= 0; - STATUS_MPP <= PrivilegeModeW; + STATUS_MPIE <= #1 STATUS_MIE; + STATUS_MIE <= #1 0; + STATUS_MPP <= #1 PrivilegeModeW; end else if (NextPrivilegeModeM == `S_MODE) begin - STATUS_SPIE <= STATUS_SIE; - STATUS_SIE <= 0; - STATUS_SPP <= PrivilegeModeW[0]; // *** seems to disagree with P. 56 + STATUS_SPIE <= #1 STATUS_SIE; + STATUS_SIE <= #1 0; + STATUS_SPP <= #1 PrivilegeModeW[0]; // *** seems to disagree with P. 56 end else begin // user mode - STATUS_UPIE <= STATUS_UIE; - STATUS_UIE <= 0; + STATUS_UPIE <= #1 STATUS_UIE; + STATUS_UIE <= #1 0; end end else if (mretM) begin // Privileged 3.1.6.1 - STATUS_MIE <= STATUS_MPIE; - STATUS_MPIE <= 1; - STATUS_MPP <= `U_SUPPORTED ? `U_MODE : `M_MODE; // per spec, not sure why - STATUS_MPRV_INT <= 0; // per 20210108 draft spec + STATUS_MIE <= #1 STATUS_MPIE; + STATUS_MPIE <= #1 1; + STATUS_MPP <= #1 `U_SUPPORTED ? `U_MODE : `M_MODE; // per spec, not sure why + STATUS_MPRV_INT <= #1 0; // per 20210108 draft spec end else if (sretM) begin - STATUS_SIE <= STATUS_SPIE; - STATUS_SPIE <= `S_SUPPORTED; - STATUS_SPP <= 0; // Privileged 4.1.1 - STATUS_MPRV_INT <= 0; // per 20210108 draft spec + STATUS_SIE <= #1 STATUS_SPIE; + STATUS_SPIE <= #1 `S_SUPPORTED; + STATUS_SPP <= #1 0; // Privileged 4.1.1 + STATUS_MPRV_INT <= #1 0; // per 20210108 draft spec end else if (uretM) begin - STATUS_UIE <= STATUS_UPIE; - STATUS_UPIE <= `U_SUPPORTED; + STATUS_UIE <= #1 STATUS_UPIE; + STATUS_UPIE <= #1 `U_SUPPORTED; end // *** add code to track STATUS_FS_INT for dirty floating point registers end diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 2e3af3e2..41d685c4 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -31,8 +31,8 @@ module privileged ( input logic FlushW, input logic CSRReadM, CSRWriteM, input logic [`XLEN-1:0] SrcAM, - input logic [31:0] InstrM, - input logic [`XLEN-1:0] PCM, + input logic [`XLEN-1:0] PCF,PCD,PCE,PCM, + input logic [31:0] InstrD, InstrE, InstrM, InstrW, output logic [`XLEN-1:0] CSRReadValW, output logic [`XLEN-1:0] PrivilegedNextPCM, output logic RetM, TrapM, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index e49cc6c6..00ae8493 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -68,8 +68,8 @@ module wallypipelinedhart ( logic [`XLEN-1:0] SrcAM; logic [2:0] Funct3E; // logic [31:0] InstrF; - logic [31:0] InstrD, InstrM; - logic [`XLEN-1:0] PCE, PCM, PCLinkE, PCLinkW; + logic [31:0] InstrD, InstrE, InstrM, InstrW; + logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; logic [`XLEN-1:0] PCTargetE; logic [`XLEN-1:0] CSRReadValW, MulDivResultW; logic [`XLEN-1:0] PrivilegedNextPCM; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index c5abff91..ea693900 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -29,6 +29,7 @@ module testbench(); parameter DEBUG = 0; parameter TESTSPERIPH = 0; // set to 0 for regression + parameter TESTSPRIV = 0; // set to 0 for regression logic clk; logic reset; @@ -516,9 +517,11 @@ string tests32f[] = '{ tests = testsBP64; // testsbp should not run the other tests. It starts at address 0 rather than // 0x8000_0000, the next if must remain an else if. - end else if (TESTSPERIPH) begin + end else if (TESTSPERIPH) tests = tests64periph; - end else begin + else if (TESTSPRIV) + tests = tests64p; + else begin tests = {tests64p,tests64i,tests64periph}; if (`C_SUPPORTED) tests = {tests, tests64ic}; else tests = {tests, tests64iNOc}; @@ -531,9 +534,11 @@ string tests32f[] = '{ //tests = {tests64a, tests}; end else begin // RV32 // *** add the 32 bit bp tests - if (TESTSPERIPH) begin + if (TESTSPERIPH) tests = tests32periph; - end else begin + else if (TESTSPRIV) + tests = tests32p; + else begin tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; From 12c34c25f3f122c90b8a99f6fdb4590f721fe0d2 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 31 May 2021 08:36:19 -0400 Subject: [PATCH 2/4] Modify elements of generics for LZD and shifter wrote for integer divider. --- wally-pipelined/src/generic/lzd.sv | 195 +++++++++++++++++++++++++++ wally-pipelined/src/generic/lzd.sv~ | 195 +++++++++++++++++++++++++++ wally-pipelined/src/generic/shift.sv | 76 +++++++++++ wally-pipelined/src/muldiv/div.sv | 146 +------------------- 4 files changed, 471 insertions(+), 141 deletions(-) create mode 100755 wally-pipelined/src/generic/lzd.sv create mode 100755 wally-pipelined/src/generic/lzd.sv~ create mode 100755 wally-pipelined/src/generic/shift.sv diff --git a/wally-pipelined/src/generic/lzd.sv b/wally-pipelined/src/generic/lzd.sv new file mode 100755 index 00000000..98642c15 --- /dev/null +++ b/wally-pipelined/src/generic/lzd.sv @@ -0,0 +1,195 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ + +// Original idea came from V. G. Oklobdzija, "An algorithmic and novel +// design of a leading zero detector circuit: comparison with logic +// synthesis," in IEEE Transactions on Very Large Scale Integration +// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: +// 10.1109/92.273153. + +// Modified to be more hierarchical + +module lzd2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lz2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lzd128 lz127 (ZP, ZV, B); + else if (WIDTH == 64) + lzd64 lz64 (ZP, ZV, B); + else if (WIDTH == 32) + lzd32 lz32 (ZP, ZV, B); + else if (WIDTH == 16) + lzd16 lz16 (ZP, ZV, B); + else if (WIDTH == 8) + lzd8 lz8 (ZP, ZV, B); + else if (WIDTH == 4) + lzd4 lz4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lzd4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lz2 l1(ZPa, ZVa, B[1:0]); + lz2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd4 + +module lzd8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lz4 l1(ZPa, ZVa, B[3:0]); + lz4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd8 + +module lzd16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lz8 l1(ZPa, ZVa, B[7:0]); + lz8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd16 + +module lzd32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lz16 l1(ZPa, ZVa, B[15:0]); + lz16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd32 + +module lzd64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lz32 l1(ZPa, ZVa, B[31:0]); + lz32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd64 + +module lzd128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lz64 l1(ZPa, ZVa, B[64:0]); + lz64 l2(ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd128 + +/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ new file mode 100755 index 00000000..bfffe5e5 --- /dev/null +++ b/wally-pipelined/src/generic/lzd.sv~ @@ -0,0 +1,195 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ + +// Original idea came from V. G. Oklobdzija, "An algorithmic and novel +// design of a leading zero detector circuit: comparison with logic +// synthesis," in IEEE Transactions on Very Large Scale Integration +// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: +// 10.1109/92.273153. + +// Modified to be more hierarchical + +module lz2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lz2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lz128 lzd127 (ZP, ZV, B); + else if (WIDTH == 64) + lz64 lzd64 (ZP, ZV, B); + else if (WIDTH == 32) + lz32 lzd32 (ZP, ZV, B); + else if (WIDTH == 16) + lz16 lzd16 (ZP, ZV, B); + else if (WIDTH == 8) + lz8 lzd8 (ZP, ZV, B); + else if (WIDTH == 4) + lz4 lzd4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lz4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lz2 l1(ZPa, ZVa, B[1:0]); + lz2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule + +module lz8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lz4 l1(ZPa, ZVa, B[3:0]); + lz4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule + +module lz16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lz8 l1(ZPa, ZVa, B[7:0]); + lz8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz16 + +module lz32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lz16 l1(ZPa, ZVa, B[15:0]); + lz16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz32 + +module lz64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lz32 l1(ZPa, ZVa, B[31:0]); + lz32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz64 + +module lz128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lz64 l1(ZPa, ZVa, B[64:0]); + lz64 l2(ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lz128 + +/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv new file mode 100755 index 00000000..88152588 --- /dev/null +++ b/wally-pipelined/src/generic/shift.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// shifters.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" +/* verilator lint_off DECLFILENAME */ +/* verilator lint_off UNOPTFLAT */ + +module shift_right #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + logic sign; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_right + +module shift_left #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_left + +/* verilator lint_on DECLFILENAME */ +/* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index db830ca3..4266ae61 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -78,11 +78,7 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); assign D_NegOne = &D; // Divider goes the distance to 37 cycles - // (thanks the evil divisor for D = 0x1) - // but could theoretically be stopped when - // divdone is asserted. The enable signal - // turns off register storage thus invalidating - // any future cycles. + // (thanks to the evil divisor for D = 0x1) // Shift D, if needed (for integer) // needed to allow qst to be in range for integer @@ -93,8 +89,8 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); // exception is given to FSM to tell the operation to // quit gracefully. - lz64 p1 (P, V, twoD); - shifter_l64 p2 (op2, twoD, P); + lzd_hier #(64) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(64) p2 (twoD, P, op2); assign op1 = twoN; assign div0 = ~V; @@ -141,9 +137,8 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); assign Q = Qd2[63:0]; assign Rem5 = Rd2[64:1]; - // Adjust remainder by m (no need to adjust by - // n ln(r) - shifter_r64 p4 (rem0, Rem5, RemShift); + // Adjust remainder by m + shift_right #(64) p4 (Rem5, RemShift, rem0); // Adjust Q/Rem for Signed assign tcQ = (SignN ^ SignD) & S; @@ -368,8 +363,6 @@ module qst4 (input logic [6:0] s, input logic [2:0] d, endmodule // qst4 -// LZD - module lz2 (P, V, B0, B1); input logic B0; @@ -497,7 +490,6 @@ module lz64 (ZP, ZV, B); endmodule // lz64 // FSM Control for Integer Divider - module fsm64 (en, state0, done, divdone, otfzero, divBusy, start, error, NumIter, clk, reset); @@ -1505,134 +1497,6 @@ module magcompare8 (LT, EQ, A, B); endmodule // magcompare8 -module shifter_l64 (Z, A, Shift); - - input logic [63:0] A; - input logic [5:0] Shift; - - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - - output logic [63:0] Z; - - mux2 #(64) mx01(A, {A[31:0], 32'h0}, Shift[5], stage1); - mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2); - mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3); - mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4); - mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5); - mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z); - -endmodule // shifter_l64 - -module shifter_r64 (Z, A, Shift); - - input logic [63:0] A; - input logic [5:0] Shift; - - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - - output logic [63:0] Z; - - mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1); - mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2); - mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3); - mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4); - mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5); - mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]}, Shift[0], Z); - -endmodule // shifter_r64 - -module shifter_l32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - - output logic [31:0] Z; - - mux2 #(32) mx01(A, {A[15:0], 16'h0}, Shift[4], stage1); - mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2); - mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0}, Shift[2], stage3); - mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0}, Shift[1], stage4); - mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0}, Shift[0], Z); - -endmodule // shifter_l32 - -module shifter_r32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - - output logic [31:0] Z; - - mux2 #(32) mx01(A, {16'h0, A[31:16]}, Shift[4], stage1); - mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2); - mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]}, Shift[2], stage3); - mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]}, Shift[1], stage4); - mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]}, Shift[0], Z); - -endmodule // shifter_r32 - -module shift_right #(parameter WIDTH=8) - (input logic [`XLEN-1:0] A, - input logic [$clog2(`XLEN)-1:0] Shift, - output logic [`XLEN-1:0] Z); - - logic [`XLEN-1:0] stage [$clog2(`XLEN):0]; - genvar i; - - assign stage[0] = A; - generate - for (i=0;i<$clog2(`XLEN);i=i+1) - begin : genbit - mux2 #(`XLEN) mux_inst (stage[i], - {{(`XLEN/(2**(i+1))){1'b0}}, stage[i][`XLEN-1:`XLEN/(2**(i+1))]}, - Shift[$clog2(`XLEN)-i-1], - stage[i+1]); - end - endgenerate - assign Z = stage[$clog2(`XLEN)]; - -endmodule // shift_right - -module shift_left #(parameter WIDTH=8) - (input logic [`XLEN-1:0] A, - input logic [$clog2(`XLEN)-1:0] Shift, - output logic [`XLEN-1:0] Z); - - logic [`XLEN-1:0] stage [$clog2(`XLEN):0]; - genvar i; - - assign stage[0] = A; - generate - for (i=0;i<$clog2(`XLEN);i=i+1) - begin : genbit - mux2 #(`XLEN) mux_inst (stage[i], - {stage[i][`XLEN-1-`XLEN/(2**(i+1)):0], {(`XLEN/(2**(i+1))){1'b0}}}, - Shift[$clog2(`XLEN)-i-1], - stage[i+1]); - end - endgenerate - assign Z = stage[$clog2(`XLEN)]; - -endmodule // shift_right - module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); input logic [63:0] Q; From 9954d16fc91017dae8df34f0b60f6ab188242708 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 31 May 2021 09:12:21 -0400 Subject: [PATCH 3/4] Add enhancements to integer divider including: - better comments - optimize FSM to end earlier - passes for 32-bit or 64-bit depending on parameter to intdiv Left div.bak in just in case have to revert back to original for now. --- wally-pipelined/src/muldiv/div.bak | 1560 ++++++++++++++++++++++++++ wally-pipelined/src/muldiv/div.sv | 614 ++++------ wally-pipelined/src/muldiv/muldiv.sv | 3 +- 3 files changed, 1773 insertions(+), 404 deletions(-) create mode 100755 wally-pipelined/src/muldiv/div.bak diff --git a/wally-pipelined/src/muldiv/div.bak b/wally-pipelined/src/muldiv/div.bak new file mode 100755 index 00000000..4266ae61 --- /dev/null +++ b/wally-pipelined/src/muldiv/div.bak @@ -0,0 +1,1560 @@ +/////////////////////////////////////////// +// mul.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// *** I added these verilator controls to clean up the +// lint output. The linter warnings should be fixed, but now the output is at +// least readable. +/* verilator lint_off COMBDLY */ +/* verilator lint_off IMPLICIT */ + +`include "wally-config.vh" + +module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); + + input logic [63:0] N, D; + input logic clk; + input logic reset; + input logic start; + input logic S; + + output logic [63:0] Qf; + output logic [63:0] remf; + output logic div0; + output logic done; + output logic divBusy; + + logic divdone; + logic enable; + logic state0; + logic V; + logic [7:0] Num; + logic [5:0] P, NumIter, RemShift; + logic [63:0] op1, op2, op1shift, Rem5; + logic [64:0] Qd, Rd, Qd2, Rd2; + logic [63:0] Q, rem0; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + logic enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp; + + logic [63:0] twoD; + logic [63:0] twoN; + logic SignD; + logic SignN; + logic [63:0] QT, remT; + logic D_NegOne; + logic Max_N; + + // Check if negative (two's complement) + // If so, convert to positive + adder #(64) cpa1 ((D ^ {64{D[63]&S}}), {63'h0, D[63]&S}, twoD); + adder #(64) cpa2 ((N ^ {64{N[63]&S}}), {63'h0, N[63]&S}, twoN); + assign SignD = D[63]; + assign SignN = N[63]; + // Max N and D = -1 (Overflow) + assign Max_N = (~|N[62:0]) & N[63]; + assign D_NegOne = &D; + + // Divider goes the distance to 37 cycles + // (thanks to the evil divisor for D = 0x1) + + // Shift D, if needed (for integer) + // needed to allow qst to be in range for integer + // division [1,2) and allow integer divide to work. + // + // The V or valid bit can be used to determine if D + // is 0 and thus a divide by 0 exception. This div0 + // exception is given to FSM to tell the operation to + // quit gracefully. + + lzd_hier #(64) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(64) p2 (twoD, P, op2); + assign op1 = twoN; + assign div0 = ~V; + + // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) + // v = 2 since \rho < 1 (add 4 to make sure its a ceil) + adder #(8) cpa3 ({2'b0, P}, + {5'h0, shiftResult, ~shiftResult, 1'b0}, + Num); + + // Determine whether need to add just Q/Rem + assign shiftResult = P[0]; + // div by 2 (ceil) + assign NumIter = Num[6:1]; + assign RemShift = P; + + // FSM to control integer divider + // assume inputs are postive edge and + // datapath (divider) is negative edge + fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, divBusyv, + start, div0, NumIter, ~clk, reset); + + flopr #(1) rega (~clk, reset, donev, done); + flopr #(1) regb (~clk, reset, divdonev, divdone); + flopr #(1) regc (~clk, reset, otfzerov, otfzero); + flopr #(1) regd (~clk, reset, enablev, enable); + flopr #(1) rege (~clk, reset, state0v, state0); + flopr #(1) regf (~clk, reset, divBusyv, divBusy); + + // To obtain a correct remainder the last bit of the + // quotient has to be aligned with a radix-r boundary. + // Since the quotient is in the range 1/2 < q < 2 (one + // integer bit and m fractional bits), this is achieved by + // shifting N right by v+s so that (m+v+s) mod k = 0. And, + // the quotient has to be aligned to the integer position. + + divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + // Storage registers to hold contents stable + flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2); + + // Probably not needed - just assigns results + assign Q = Qd2[63:0]; + assign Rem5 = Rd2[64:1]; + + // Adjust remainder by m + shift_right #(64) p4 (Rem5, RemShift, rem0); + + // Adjust Q/Rem for Signed + assign tcQ = (SignN ^ SignD) & S; + assign tcR = SignN & S; + // Signed Divide + // - When N and D are negative: Remainder is negative (undergoes a two's complement). + // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). + // - When D is negative: Quotient is negative (undergoes a two's complement). + adder #(64) cpa4 ((rem0 ^ {64{tcR}}), {63'h0, tcR}, remT); + adder #(64) cpa5 ((Q ^ {64{tcQ}}), {63'h0, tcQ}, QT); + + // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) + exception_int exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); + +endmodule // int32div + +module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + input logic [63:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; + + output logic [64:0] rem0; + output logic [64:0] Q; + output logic [3:0] quotient; + + logic [67:0] Sum, Carry; + logic [64:0] Qstar; + logic [64:0] QMstar; + logic [7:0] qtotal; + logic [67:0] SumN, CarryN, SumN2, CarryN2; + logic [67:0] divi1, divi2, divi1c, divi2c, dive1; + logic [67:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [67:0] rem1, rem2, rem3; + logic [67:0] SumR, CarryR; + logic [64:0] Qt; + + // Create one's complement values of Divisor (for q*D) + assign divi1 = {3'h0, op2, 1'b0}; + assign divi2 = {2'h0, op2, 2'b0}; + assign divi1c = ~divi1; + assign divi2c = ~divi2; + // Shift x1 if not mod k + mux2 #(68) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + + // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) + mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN); + mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN); + // Simplify QST + adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal); + // q = {+2, +1, -1, -2} else q = 0 + qst4 pd1 (qtotal[7:1], divi1[63:61], quotient); + assign ulp = quotient[2]|quotient[3]; + assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); + // Map to binary encoding + assign qsel[1] = quotient[3]|quotient[2]; + assign qsel[0] = quotient[3]|quotient[1]; + mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi); + csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry); + // regs : save CSA + flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2); + // OTF + ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); + otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, + otfzero, enable, Qstar, QMstar); + + // Correction and generation of Remainder + adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1); + // Add back +D as correction + csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR); + adder #(68) cpa3 (SumR, CarryR, rem2); + // Choose remainder (Rem or Rem+D) + mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3); + // Choose correct Q or QM + mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt); + // Final results + assign rem0 = rem3[64:0]; + assign Q = Qt; + +endmodule // divide4x64 + +module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); + + input logic [3:0] quot; + + output logic [1:0] Qin; + output logic [1:0] QMin; + output logic CshiftQ; + output logic CshiftQM; + + // Load/Store Control for OTF + assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]); + assign Qin[0] = (quot[1]) | (quot[2]); + assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign QMin[0] = (quot[3]) | (quot[0]) | + (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign CshiftQ = (quot[1]) | (quot[0]); + assign CshiftQM = (quot[3]) | (quot[2]); + +endmodule + +// On-the-fly Conversion per Ercegovac/Lang + +module otf #(parameter WIDTH=8) + (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); + + input logic [1:0] Qin, QMin; + input logic CshiftQ, CshiftQM; + input logic clk; + input logic reset; + input logic enable; + + output logic [WIDTH-1:0] R2Q; + output logic [WIDTH-1:0] R1Q; + + logic [WIDTH-1:0] Qstar, QMstar; + logic [WIDTH-1:0] M1Q, M2Q; + + // QM + mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); + flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); + // Q + mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); + flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); + + assign Qstar = R2Q; + assign QMstar = R1Q; + +endmodule // otf8 + +module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + + assign y = a + b; + +endmodule // adder + +module fa (input logic a, b, c, output logic sum, carry); + + assign sum = a^b^c; + assign carry = a&b|a&c|b&c; + +endmodule // fa + +module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + logic [WIDTH:0] carry_temp; + genvar i; + generate + for (i=0;i B. LT and GT are both '0' if A = B. + +module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule // magcompare2b + +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare8 (LT, EQ, A, B); + + input logic [7:0] A; + input logic [7:0] B; + + logic [3:0] s; + logic [3:0] t; + logic [1:0] u; + logic [1:0] v; + logic GT; + //wire LT; + + output logic EQ; + output logic LT; + + magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); + + magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); + magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); + + magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); + + assign EQ = ~(GT | LT); + +endmodule // magcompare8 + +module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); + + input logic [63:0] Q; + input logic [63:0] rem; + input logic [63:0] op1; + input logic S; + input logic div0; + input logic Max_N; + input logic D_NegOne; + + output logic [63:0] Qf; + output logic [63:0] remf; + + // Needs to be optimized + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : Qf = Q; + 4'b0001 : Qf = Q; + 4'b0010 : Qf = Q; + 4'b0011 : Qf = Q; + 4'b0100 : Qf = Q; + 4'b0101 : Qf = Q; + 4'b0110 : Qf = Q; + 4'b0111 : Qf = {1'b1, 31'h0}; + 4'b1000 : Qf = {64{1'b1}}; + 4'b1001 : Qf = {64{1'b1}}; + 4'b1010 : Qf = {64{1'b1}}; + 4'b1011 : Qf = {64{1'b1}}; + 4'b1100 : Qf = {64{1'b1}}; + 4'b1101 : Qf = {64{1'b1}}; + 4'b1110 : Qf = {64{1'b1}}; + 4'b1111 : Qf = {64{1'b1}}; + default: Qf = Q; + endcase + + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : remf = rem; + 4'b0001 : remf = rem; + 4'b0010 : remf = rem; + 4'b0011 : remf = rem; + 4'b0100 : remf = rem; + 4'b0101 : remf = rem; + 4'b0110 : remf = rem; + 4'b0111 : remf = 64'h0; + 4'b1000 : remf = op1; + 4'b1001 : remf = op1; + 4'b1010 : remf = op1; + 4'b1011 : remf = op1; + 4'b1100 : remf = op1; + 4'b1101 : remf = op1; + 4'b1110 : remf = op1; + 4'b1111 : remf = op1; + default: remf = rem; + endcase + +endmodule // exception_int + +/* verilator lint_on COMBDLY */ +/* verilator lint_on IMPLICIT */ + diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 4266ae61..107b002f 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// mul.sv +// divide4x64.sv // // Written: James.Stine@okstate.edu 1 February 2021 // Modified: @@ -29,54 +29,53 @@ /* verilator lint_off COMBDLY */ /* verilator lint_off IMPLICIT */ -`include "wally-config.vh" +module intdiv #(parameter WIDTH=64) + (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); -module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); - - input logic [63:0] N, D; - input logic clk; - input logic reset; - input logic start; - input logic S; + input logic [WIDTH-1:0] N, D; + input logic clk; + input logic reset; + input logic start; + input logic S; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + output logic div0; + output logic done; + output logic divBusy; + + logic enable; + logic state0; + logic V; + logic [$clog2(WIDTH):0] Num; + logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift; + logic [WIDTH-1:0] op1, op2, op1shift, Rem5; + logic [WIDTH:0] Qd, Rd, Qd2, Rd2; + logic [WIDTH-1:0] Q, rem0; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + logic enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp; + + logic [WIDTH-1:0] twoD; + logic [WIDTH-1:0] twoN; + logic SignD; + logic SignN; + logic [WIDTH-1:0] QT, remT; + logic D_NegOne; + logic Max_N; - output logic [63:0] Qf; - output logic [63:0] remf; - output logic div0; - output logic done; - output logic divBusy; - - logic divdone; - logic enable; - logic state0; - logic V; - logic [7:0] Num; - logic [5:0] P, NumIter, RemShift; - logic [63:0] op1, op2, op1shift, Rem5; - logic [64:0] Qd, Rd, Qd2, Rd2; - logic [63:0] Q, rem0; - logic [3:0] quotient; - logic otfzero; - logic shiftResult; - logic enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp; - - logic [63:0] twoD; - logic [63:0] twoN; - logic SignD; - logic SignN; - logic [63:0] QT, remT; - logic D_NegOne; - logic Max_N; // Check if negative (two's complement) // If so, convert to positive - adder #(64) cpa1 ((D ^ {64{D[63]&S}}), {63'h0, D[63]&S}, twoD); - adder #(64) cpa2 ((N ^ {64{N[63]&S}}), {63'h0, N[63]&S}, twoN); - assign SignD = D[63]; - assign SignN = N[63]; + adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); + adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); + assign SignD = D[WIDTH-1]; + assign SignN = N[WIDTH-1]; // Max N and D = -1 (Overflow) - assign Max_N = (~|N[62:0]) & N[63]; + assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; assign D_NegOne = &D; - + // Divider goes the distance to 37 cycles // (thanks to the evil divisor for D = 0x1) @@ -89,31 +88,31 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); // exception is given to FSM to tell the operation to // quit gracefully. - lzd_hier #(64) p1 (.ZP(P), .ZV(V), .B(twoD)); - shift_left #(64) p2 (twoD, P, op2); - assign op1 = twoN; + lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(WIDTH) p2 (twoD, P, op2); + assign op1 = twoN; assign div0 = ~V; - // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) + // #iter: N = m+v+s = m+2+s (mod k = 0) // v = 2 since \rho < 1 (add 4 to make sure its a ceil) - adder #(8) cpa3 ({2'b0, P}, - {5'h0, shiftResult, ~shiftResult, 1'b0}, - Num); + // k = 2 (r = 2^k) + adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, + {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, + Num); // Determine whether need to add just Q/Rem assign shiftResult = P[0]; // div by 2 (ceil) - assign NumIter = Num[6:1]; + assign NumIter = Num[$clog2(WIDTH):1]; assign RemShift = P; // FSM to control integer divider // assume inputs are postive edge and // datapath (divider) is negative edge - fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, divBusyv, - start, div0, NumIter, ~clk, reset); + fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, divBusyv, + start, div0, NumIter, ~clk, reset); flopr #(1) rega (~clk, reset, donev, done); - flopr #(1) regb (~clk, reset, divdonev, divdone); flopr #(1) regc (~clk, reset, otfzerov, otfzero); flopr #(1) regd (~clk, reset, enablev, enable); flopr #(1) rege (~clk, reset, state0v, state0); @@ -125,64 +124,66 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S); // integer bit and m fractional bits), this is achieved by // shifting N right by v+s so that (m+v+s) mod k = 0. And, // the quotient has to be aligned to the integer position. - - divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); + divide4 #(WIDTH) p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); // Storage registers to hold contents stable - flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2); - flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2); + flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); // Probably not needed - just assigns results - assign Q = Qd2[63:0]; - assign Rem5 = Rd2[64:1]; + assign Q = Qd2[WIDTH-1:0]; + assign Rem5 = Rd2[WIDTH:1]; - // Adjust remainder by m - shift_right #(64) p4 (Rem5, RemShift, rem0); + // Adjust remainder by m (no need to adjust by + shift_right #(WIDTH) p4 (Rem5, RemShift, rem0); // Adjust Q/Rem for Signed assign tcQ = (SignN ^ SignD) & S; assign tcR = SignN & S; - // Signed Divide + + // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): // - When N and D are negative: Remainder is negative (undergoes a two's complement). // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). // - When D is negative: Quotient is negative (undergoes a two's complement). - adder #(64) cpa4 ((rem0 ^ {64{tcR}}), {63'h0, tcR}, remT); - adder #(64) cpa5 ((Q ^ {64{tcQ}}), {63'h0, tcQ}, QT); + adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); + adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) - exception_int exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); - + exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); + endmodule // int32div -module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); +// Division by Recurrence (r=4) +module divide4 #(parameter WIDTH=64) + (Q, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); - input logic [63:0] op1, op2; - input logic clk, state0; - input logic reset; - input logic enable; - input logic otfzero; - input logic shiftResult; + input logic [WIDTH-1:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; - output logic [64:0] rem0; - output logic [64:0] Q; - output logic [3:0] quotient; + output logic [WIDTH:0] rem0; + output logic [WIDTH:0] Q; + output logic [3:0] quotient; - logic [67:0] Sum, Carry; - logic [64:0] Qstar; - logic [64:0] QMstar; - logic [7:0] qtotal; - logic [67:0] SumN, CarryN, SumN2, CarryN2; - logic [67:0] divi1, divi2, divi1c, divi2c, dive1; - logic [67:0] mdivi_temp, mdivi; - logic zero; - logic [1:0] qsel; - logic [1:0] Qin, QMin; - logic CshiftQ, CshiftQM; - logic [67:0] rem1, rem2, rem3; - logic [67:0] SumR, CarryR; - logic [64:0] Qt; + logic [WIDTH+3:0] Sum, Carry; + logic [WIDTH:0] Qstar; + logic [WIDTH:0] QMstar; + logic [7:0] qtotal; + logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; + logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; + logic [WIDTH+3:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [WIDTH+3:0] rem1, rem2, rem3; + logic [WIDTH+3:0] SumR, CarryR; + logic [WIDTH:0] Qt; // Create one's complement values of Divisor (for q*D) assign divi1 = {3'h0, op2, 1'b0}; @@ -190,42 +191,42 @@ module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, assign divi1c = ~divi1; assign divi2c = ~divi2; // Shift x1 if not mod k - mux2 #(68) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) - mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN); - mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN); + mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); + mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); // Simplify QST - adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal); + adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal); // q = {+2, +1, -1, -2} else q = 0 - qst4 pd1 (qtotal[7:1], divi1[63:61], quotient); + qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient); assign ulp = quotient[2]|quotient[3]; assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); // Map to binary encoding assign qsel[1] = quotient[3]|quotient[2]; assign qsel[0] = quotient[3]|quotient[1]; - mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); - mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi); - csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry); + mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); + csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); // regs : save CSA - flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2); - flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2); + flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); // OTF ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); - otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, - otfzero, enable, Qstar, QMstar); + otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, + otfzero, enable, Qstar, QMstar); // Correction and generation of Remainder - adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1); + adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); // Add back +D as correction - csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR); - adder #(68) cpa3 (SumR, CarryR, rem2); + csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); + adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); // Choose remainder (Rem or Rem+D) - mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3); + mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); // Choose correct Q or QM - mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt); + mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); // Final results - assign rem0 = rem3[64:0]; + assign rem0 = rem3[WIDTH:0]; assign Q = Qt; endmodule // divide4x64 @@ -304,10 +305,9 @@ module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]); end endgenerate - //assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0}; // trmimmed excess bit dh 5/3/21 - assign carry = {carry_temp[WIDTH-1:1], 1'b0}; + assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0}; -endmodule // adder +endmodule // csa module eqcmp #(parameter WIDTH = 8) (input logic [WIDTH-1:0] a, b, @@ -490,26 +490,24 @@ module lz64 (ZP, ZV, B); endmodule // lz64 // FSM Control for Integer Divider -module fsm64 (en, state0, done, divdone, otfzero, divBusy, - start, error, NumIter, clk, reset); +module fsm64 #(parameter WIDTH=6) + (en, state0, done, otfzero, divBusy, start, error, NumIter, clk, reset); - input logic [5:0] NumIter; - input logic clk; - input logic reset; - input logic start; - input logic error; + input logic [WIDTH-1:0] NumIter; + input logic clk; + input logic reset; + input logic start; + input logic error; - output logic done; - output logic en; - output logic state0; - output logic divdone; - output logic otfzero; - output logic divBusy; + output logic done; + output logic en; + output logic state0; + output logic otfzero; + output logic divBusy; - logic LT, EQ; - logic Divide0; - logic [5:0] CURRENT_STATE; - logic [5:0] NEXT_STATE; + logic LT, EQ; + logic [5:0] CURRENT_STATE; + logic [5:0] NEXT_STATE; parameter [5:0] S0=6'd0, S1=6'd1, S2=6'd2, @@ -534,12 +532,8 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, CURRENT_STATE<=NEXT_STATE; end - // Going to cheat and hard code number of states - // needed into FSM instead of using a counter - // FIXME: could counter be better - // Cheated and made 8 - let synthesis do its magic - magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {2'h0, NumIter}); + magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {{8-WIDTH{1'b0}}, NumIter}); always @(CURRENT_STATE or start) begin @@ -552,7 +546,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; divBusy = 1'b0; state0 = 1'b0; - divdone = 1'b0; done = 1'b0; NEXT_STATE <= S0; end @@ -560,30 +553,21 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, begin otfzero = 1'b0; en = 1'b1; - divBusy = 1'b1; + divBusy = 1'b1; state0 = 1'b1; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S1; end end S1: begin - otfzero = 1'b0; - divBusy = 1'b1; + otfzero = 1'b0; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S2; end else @@ -591,8 +575,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S2; + NEXT_STATE <= S36; end end // case: S1 S2: @@ -604,10 +587,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S3; end // if (LT|EQ) else @@ -615,8 +594,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S3; + NEXT_STATE <= S36; end end // case: S2 S3: @@ -628,10 +606,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S4; end else @@ -639,8 +613,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S4; + NEXT_STATE <= S36; end end // case: S3 S4: @@ -652,10 +625,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S5; end else @@ -663,8 +632,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S5; + NEXT_STATE <= S36; end end // case: S4 S5: @@ -676,10 +644,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S6; end // if (LT|EQ) else @@ -687,8 +651,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S6; + NEXT_STATE <= S36; end end // case: S5 S6: @@ -700,10 +663,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S7; end // if (LT|EQ) else @@ -711,8 +670,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S7; + NEXT_STATE <= S36; end end // case: S6 S7: @@ -724,10 +682,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S8; end // if (LT|EQ) else @@ -735,8 +689,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S8; + NEXT_STATE <= S36; end end // case: S7 S8: @@ -748,10 +701,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S9; end // if (LT|EQ) else @@ -759,8 +708,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S9; + NEXT_STATE <= S36; end end // case: S8 S9: @@ -772,10 +720,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S10; end // if (LT|EQ) else @@ -783,8 +727,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S10; + NEXT_STATE <= S36; end end // case: S9 S10: @@ -796,10 +739,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S11; end // if (LT|EQ) else @@ -807,8 +746,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S11; + NEXT_STATE <= S36; end end // case: S10 S11: @@ -820,10 +758,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S12; end // if (LT|EQ) else @@ -831,8 +765,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S12; + NEXT_STATE <= S36; end end // case: S11 S12: @@ -844,10 +777,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S13; end // if (LT|EQ) else @@ -855,8 +784,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S13; + NEXT_STATE <= S36; end end // case: S12 S13: @@ -868,10 +796,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S14; end // if (LT|EQ) else @@ -879,23 +803,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S14; + NEXT_STATE <= S36; end end // case: S13 S14: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S15; end // if (LT|EQ) else @@ -903,23 +822,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S15; + NEXT_STATE <= S36; end end // case: S14 S15: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S16; end // if (LT|EQ) else @@ -927,23 +841,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S16; + NEXT_STATE <= S36; end end // case: S15 S16: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S17; end // if (LT|EQ) else @@ -951,23 +860,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S17; + NEXT_STATE <= S36; end end // case: S16 S17: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S18; end // if (LT|EQ) else @@ -975,23 +879,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S18; + NEXT_STATE <= S36; end end // case: S17 S18: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S19; end // if (LT|EQ) else @@ -999,23 +898,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S19; + NEXT_STATE <= S36; end end // case: S18 S19: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S20; end // if (LT|EQ) else @@ -1023,23 +917,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S20; + NEXT_STATE <= S36; end end // case: S19 S20: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S21; end // if (LT|EQ) else @@ -1047,23 +936,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S21; + NEXT_STATE <= S36; end end // case: S20 S21: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S22; end // if (LT|EQ) else @@ -1071,23 +955,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S22; + NEXT_STATE <= S36; end end // case: S21 S22: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S23; end // if (LT|EQ) else @@ -1095,23 +974,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S23; + NEXT_STATE <= S36; end end // case: S22 S23: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S24; end // if (LT|EQ) else @@ -1119,23 +993,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S24; + NEXT_STATE <= S36; end end // case: S23 S24: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S25; end // if (LT|EQ) else @@ -1143,23 +1012,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S25; + NEXT_STATE <= S36; end end // case: S24 S25: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S26; end // if (LT|EQ) else @@ -1167,23 +1031,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S26; + NEXT_STATE <= S36; end end // case: S25 S26: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S27; end // if (LT|EQ) else @@ -1191,23 +1050,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S27; + NEXT_STATE <= S36; end end // case: S26 S27: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S28; end // if (LT|EQ) else @@ -1215,23 +1069,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S28; + NEXT_STATE <= S36; end end // case: S27 S28: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S29; end // if (LT|EQ) else @@ -1239,23 +1088,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S29; + NEXT_STATE <= S36; end end // case: S28 S29: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S30; end // if (LT|EQ) else @@ -1263,23 +1107,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S30; + NEXT_STATE <= S36; end end // case: S29 S30: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S31; end // if (LT|EQ) else @@ -1287,8 +1126,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S31; + NEXT_STATE <= S36; end end // case: S30 S31: @@ -1300,10 +1138,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S32; end // if (LT|EQ) else @@ -1311,8 +1145,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S32; + NEXT_STATE <= S36; end end // case: S31 S32: @@ -1324,10 +1157,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S33; end // if (LT|EQ) else @@ -1335,8 +1164,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S33; + NEXT_STATE <= S36; end end // case: S32 S33: @@ -1348,10 +1176,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S34; end // if (LT|EQ) else @@ -1359,23 +1183,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S34; + NEXT_STATE <= S36; end end // case: S33 S34: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b1; if (LT|EQ) begin en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S35; end // if (LT|EQ) else @@ -1383,8 +1202,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; - NEXT_STATE <= S35; + NEXT_STATE <= S36; end end // case: S34 S35: @@ -1396,10 +1214,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b1; state0 = 1'b0; done = 1'b0; - if (EQ) - divdone = 1'b1; - else - divdone = 1'b0; NEXT_STATE <= S36; end // if (LT|EQ) else @@ -1407,7 +1221,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S36; end end // case: S35 @@ -1419,12 +1232,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, done = 1'b1; if (EQ) begin - divdone = 1'b1; en = 1'b1; end else begin - divdone = 1'b0; en = 1'b0; end NEXT_STATE <= S0; @@ -1432,11 +1243,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy, default: begin otfzero = 1'b0; - divBusy = 1'b1; + divBusy = 1'b0; en = 1'b0; state0 = 1'b0; done = 1'b0; - divdone = 1'b0; NEXT_STATE <= S0; end endcase // case(CURRENT_STATE) @@ -1497,38 +1307,39 @@ module magcompare8 (LT, EQ, A, B); endmodule // magcompare8 -module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); +// RISC-V Exception Logic for Divide by 0 and Overflow (Signed Integer Divide) +module exception_int #(parameter WIDTH=8) + (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); - input logic [63:0] Q; - input logic [63:0] rem; - input logic [63:0] op1; - input logic S; - input logic div0; - input logic Max_N; - input logic D_NegOne; + input logic [WIDTH-1:0] Q; + input logic [WIDTH-1:0] rem; + input logic [WIDTH-1:0] op1; + input logic S; + input logic div0; + input logic Max_N; + input logic D_NegOne; - output logic [63:0] Qf; - output logic [63:0] remf; + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; - // Needs to be optimized always_comb case ({div0, S, Max_N, D_NegOne}) 4'b0000 : Qf = Q; 4'b0001 : Qf = Q; - 4'b0010 : Qf = Q; - 4'b0011 : Qf = Q; + 4'b0010 : Qf = Q; + 4'b0011 : Qf = Q; 4'b0100 : Qf = Q; - 4'b0101 : Qf = Q; + 4'b0101 : Qf = Q; 4'b0110 : Qf = Q; - 4'b0111 : Qf = {1'b1, 31'h0}; - 4'b1000 : Qf = {64{1'b1}}; - 4'b1001 : Qf = {64{1'b1}}; - 4'b1010 : Qf = {64{1'b1}}; - 4'b1011 : Qf = {64{1'b1}}; - 4'b1100 : Qf = {64{1'b1}}; - 4'b1101 : Qf = {64{1'b1}}; - 4'b1110 : Qf = {64{1'b1}}; - 4'b1111 : Qf = {64{1'b1}}; + 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; + 4'b1000 : Qf = {WIDTH{1'b1}}; + 4'b1001 : Qf = {WIDTH{1'b1}}; + 4'b1010 : Qf = {WIDTH{1'b1}}; + 4'b1011 : Qf = {WIDTH{1'b1}}; + 4'b1100 : Qf = {WIDTH{1'b1}}; + 4'b1101 : Qf = {WIDTH{1'b1}}; + 4'b1110 : Qf = {WIDTH{1'b1}}; + 4'b1111 : Qf = {WIDTH{1'b1}}; default: Qf = Q; endcase @@ -1536,18 +1347,18 @@ module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); case ({div0, S, Max_N, D_NegOne}) 4'b0000 : remf = rem; 4'b0001 : remf = rem; - 4'b0010 : remf = rem; + 4'b0010 : remf = rem; 4'b0011 : remf = rem; 4'b0100 : remf = rem; 4'b0101 : remf = rem; 4'b0110 : remf = rem; - 4'b0111 : remf = 64'h0; + 4'b0111 : remf = {WIDTH{1'h0}}; 4'b1000 : remf = op1; 4'b1001 : remf = op1; 4'b1010 : remf = op1; 4'b1011 : remf = op1; 4'b1100 : remf = op1; - 4'b1101 : remf = op1; + 4'b1101 : remf = op1; 4'b1110 : remf = op1; 4'b1111 : remf = op1; default: remf = rem; @@ -1557,4 +1368,3 @@ endmodule // exception_int /* verilator lint_on COMBDLY */ /* verilator lint_on IMPLICIT */ - diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 17c4aac5..f4096fd1 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -78,7 +78,7 @@ module muldiv ( .en(startDivideE), .clear(DivDoneE), .reset(reset), .clk(~gclk)); assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - div div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); // Added for debugging of start signal for divide assign startDivideE = MulDivE&DivStartE&~DivBusyE; @@ -93,7 +93,6 @@ module muldiv ( // Select result always_comb - // case (DivDoneE ? Funct3E_Q : Funct3E) case (Funct3E) 3'b000: PrelimResultE = ProdE[`XLEN-1:0]; 3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; From 46a232b862249262e91fd0241c48f7b662bac599 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Mon, 31 May 2021 09:16:30 -0400 Subject: [PATCH 4/4] Cosmetic changes on integer divider --- wally-pipelined/src/muldiv/div.sv | 7 ++++--- wally-pipelined/src/muldiv/muldiv.sv | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 107b002f..8b4e0463 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -55,7 +55,7 @@ module intdiv #(parameter WIDTH=64) logic [3:0] quotient; logic otfzero; logic shiftResult; - logic enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp; + logic enablev, state0v, donev, oftzerov, divBusyv, ulp; logic [WIDTH-1:0] twoD; logic [WIDTH-1:0] twoN; @@ -231,6 +231,7 @@ module divide4 #(parameter WIDTH=64) endmodule // divide4x64 +// Load/Control for OTFC module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); input logic [3:0] quot; @@ -251,8 +252,7 @@ module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); endmodule -// On-the-fly Conversion per Ercegovac/Lang - +// On-the-fly Conversion (OTFC) module otf #(parameter WIDTH=8) (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); @@ -317,6 +317,7 @@ module eqcmp #(parameter WIDTH = 8) endmodule // eqcmp +// QST for r=4 module qst4 (input logic [6:0] s, input logic [2:0] d, output logic [3:0] q); diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index f4096fd1..ccabe341 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -47,7 +47,6 @@ module muldiv ( logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; - //logic [`XLEN-1:0] Q, R; logic [`XLEN*2-1:0] ProdE; logic enable_q;