From 60cfa0d69cdeadaae7151b1bb57d7e7cd5191c27 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 4 Nov 2022 15:21:09 -0700 Subject: [PATCH 01/10] HPTW cleanup --- pipelined/src/mmu/hptw.sv | 118 +++++++++++++------------------- pipelined/src/mmu/tlb.sv | 20 +++--- pipelined/src/mmu/tlbcontrol.sv | 14 +--- pipelined/src/mmu/vm64check.sv | 50 ++++++++++++++ 4 files changed, 110 insertions(+), 92 deletions(-) create mode 100644 pipelined/src/mmu/vm64check.sv diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index 7b303ff43..e2b2573ed 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -42,7 +42,7 @@ module hptw input logic [1:0] STATUS_MPP, input logic [1:0] PrivilegeModeW, (* mark_debug = "true" *) input logic ITLBMissOrDAFaultNoTrapF, DTLBMissOrDAFaultNoTrapM, // TLB Miss - input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU + input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU *** change to ReadDataM input logic DCacheStallM, // stall from LSU output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs @@ -106,7 +106,6 @@ module hptw if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - logic SV39Mode; logic ReadAccess, WriteAccess; logic InvalidRead, InvalidWrite; logic UpperBitsUnequalPageFault; @@ -136,19 +135,9 @@ module hptw assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); - // *** turn into module common with code in tlbcontrol. - if (`XLEN==64) begin:rv64 - assign SV39Mode = (SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS] == `SV39); - // page fault if upper bits aren't all the same - logic UpperEqual39, UpperEqual48; - assign UpperEqual39 = &(TranslationVAdr[63:38]) | ~|(TranslationVAdr[63:38]); - assign UpperEqual48 = &(TranslationVAdr[63:47]) | ~|(TranslationVAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; - end else begin - assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; - end - + // Check for page faults + vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), + .SV39Mode(), .UpperBitsUnequalPageFault); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : @@ -190,26 +179,26 @@ module hptw // HPTWAdr muxing if (`XLEN==32) begin // RV32 - logic [9:0] VPN; - logic [`PPN_BITS-1:0] PPN; - assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state - assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; - assign HPTWReadAdr = {PPN, VPN, 2'b00}; - assign HPTWSize = 3'b010; + logic [9:0] VPN; + logic [`PPN_BITS-1:0] PPN; + assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state + assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; + assign HPTWReadAdr = {PPN, VPN, 2'b00}; + assign HPTWSize = 3'b010; end else begin // RV64 - logic [8:0] VPN; - logic [`PPN_BITS-1:0] PPN; - always_comb - case (WalkerState) // select VPN field based on HPTW state - L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; - L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; - L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; - default: VPN = TranslationVAdr[20:12]; - endcase - assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | - (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; - assign HPTWReadAdr = {PPN, VPN, 3'b000}; - assign HPTWSize = 3'b011; + logic [8:0] VPN; + logic [`PPN_BITS-1:0] PPN; + always_comb + case (WalkerState) // select VPN field based on HPTW state + L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; + L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; + L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; + default: VPN = TranslationVAdr[20:12]; + endcase + assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | + (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; + assign HPTWReadAdr = {PPN, VPN, 3'b000}; + assign HPTWSize = 3'b011; end // Initial state and misalignment for RV32/64 @@ -228,44 +217,33 @@ module hptw end // Page Table Walker FSM - // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states - // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the - // HPTW as shown below to keep the D$ setup time out of the critical path. - // *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead. - // *** address TYPE(statetype) flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb - case (WalkerState) - IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; - else NextWalkerState = IDLE; - L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; - else NextWalkerState = L2_ADR; - L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 - else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L2_RD; - else NextWalkerState = LEAF; - L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; - else NextWalkerState = L1_ADR; - L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 - else if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L1_RD; - else NextWalkerState = LEAF; - L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; - else NextWalkerState = L0_ADR; - L0_ADR: if (ValidLeafPTE & ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; - L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; - LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; - else NextWalkerState = IDLE; - UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; - else NextWalkerState = LEAF; - default: begin - NextWalkerState = IDLE; // should never be reached - end - endcase // case (WalkerState) + case (WalkerState) + IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; + else NextWalkerState = IDLE; + L3_ADR: NextWalkerState = L3_RD; // first access in SV48 + L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; + else NextWalkerState = L2_ADR; + L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 + else NextWalkerState = LEAF; + L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; + else NextWalkerState = L1_ADR; + L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 + else if (ValidNonLeafPTE) NextWalkerState = L1_RD; + else NextWalkerState = LEAF; + L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; + else NextWalkerState = L0_ADR; + L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + LEAF: if (DAPageFault) NextWalkerState = UPDATE_PTE; + else NextWalkerState = IDLE; + UPDATE_PTE: if(`HPTW_WRITES_SUPPORTED & DCacheStallM) NextWalkerState = UPDATE_PTE; + else NextWalkerState = LEAF; + default: NextWalkerState = IDLE; // should never be reached + endcase // case (WalkerState) assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss; assign SelHPTW = WalkerState != IDLE; diff --git a/pipelined/src/mmu/tlb.sv b/pipelined/src/mmu/tlb.sv index 6954e1d95..2f4fd5560 100644 --- a/pipelined/src/mmu/tlb.sv +++ b/pipelined/src/mmu/tlb.sv @@ -116,16 +116,16 @@ module tlb #(parameter TLB_ENTRIES = 8, // we cache Misaligned along with the PTE? This only has to be computed once // in the hptw as it is always the same regardless of the VPN. if(`XLEN == 32) begin - assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 - assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; + assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 + assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; end else begin - logic GigapageMisaligned, TerapageMisaligned; - assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 - assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 - assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 - assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | - ((HitPageType == 2'b10) & GigapageMisaligned) | - ((HitPageType == 2'b01) & MegapageMisaligned); + logic GigapageMisaligned, TerapageMisaligned; + assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 + assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 + assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 + assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | + ((HitPageType == 2'b10) & GigapageMisaligned) | + ((HitPageType == 2'b01) & MegapageMisaligned); end assign VPN = VAdr[`VPN_BITS+11:12]; @@ -137,7 +137,7 @@ module tlb #(parameter TLB_ENTRIES = 8, tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) - tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, + tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .SATP_ASID, .Matches, .HitPageType, .CAMHit); tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); diff --git a/pipelined/src/mmu/tlbcontrol.sv b/pipelined/src/mmu/tlbcontrol.sv index 5a9e4852d..8b3da2f35 100644 --- a/pipelined/src/mmu/tlbcontrol.sv +++ b/pipelined/src/mmu/tlbcontrol.sv @@ -68,22 +68,12 @@ module tlbcontrol #(parameter ITLB = 0) ( // Grab the sv mode from SATP and determine whether translation should occur assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; - if (`XLEN==64) begin:rv64 - assign SV39Mode = (SATP_MODE == `SV39); - // page fault if upper bits aren't all the same - logic UpperEqual39, UpperEqual48; - assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); - assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; - end else begin - assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; - end // Determine whether TLB is being used assign TLBAccess = ReadAccess | WriteAccess; // Check whether upper bits of virtual addresss are all equal + vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; @@ -99,7 +89,7 @@ module tlbcontrol #(parameter ITLB = 0) ( assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end else begin - // fault for software handling if access bit is off + // fault for software handling if access bit is off assign DAPageFault = ~PTE_A; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end diff --git a/pipelined/src/mmu/vm64check.sv b/pipelined/src/mmu/vm64check.sv new file mode 100644 index 000000000..cedeb5267 --- /dev/null +++ b/pipelined/src/mmu/vm64check.sv @@ -0,0 +1,50 @@ +/////////////////////////////////////////// +// vm64check.sv +// +// Written: David_Harris@hmc.edu 4 November 2022 +// Modified: +// +// Purpose: Check for good upper address bits in RV64 mode +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module vm64check ( + input logic [`SVMODE_BITS-1:0] SATP_MODE, + input logic [`XLEN-1:0] VAdr, + output logic SV39Mode, UpperBitsUnequalPageFault +); + + if (`XLEN==64) begin:rv64 + assign SV39Mode = (SATP_MODE == `SV39); + // page fault if upper bits aren't all the same + logic UpperEqual39, UpperEqual48; + assign UpperEqual39 = &(VAdr[63:38]) | ~|(VAdr[63:38]); + assign UpperEqual48 = &(VAdr[63:47]) | ~|(VAdr[63:47]); + assign UpperBitsUnequalPageFault = SV39Mode ? ~UpperEqual39 : ~UpperEqual48; + end else begin + assign SV39Mode = 0; + assign UpperBitsUnequalPageFault = 0; + end +endmodule From 53a88fec8f104537cc236c49ef35b43be8a8e4ff Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 4 Nov 2022 15:21:51 -0700 Subject: [PATCH 02/10] Reorder embench tests to prevent crash --- pipelined/testbench/tests.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 91d3dcf12..633ecb81d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -55,9 +55,9 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/matmult-int/matmult-int", // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back "bd_speedopt_speed/src/minver/minver", - "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256", + "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", From 90ef371abc97b01ab73bf6f31bf17eb40132585f Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Sat, 5 Nov 2022 13:34:24 -0700 Subject: [PATCH 03/10] fixed fifo timout handling. error now in data ready interrupt --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 454d05be5..ca197876c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -1072,9 +1072,9 @@ uart_data_wait: li t3, 0x10000002 // IIR li a4, 0x61 uart_read_LSR_IIR: - lb t4, 0(t3) // save IIR before reading LSR mgith clear it + lbu t4, 0(t3) // save IIR before reading LSR might clear it // check if IIR is the rxfifotimeout interrupt. if it is, then read the fifo then go back and repeat this. - li t5, 6 + li t5, 0xCC // Value in IIR for Fifo Enabled, with timeout interrupt pending beq t4, t5, uart_rxfifo_timout lb t5, 0(t2) // read LSR andi t6, t5, 0x61 // wait until all transmissions are done and data is ready @@ -1083,7 +1083,6 @@ uart_read_LSR_IIR: uart_rxfifo_timout: li t4, 0x10000000 // read from the fifo lb t5, 0(t4) - lb t5, 0(t4) //read the fifo until empty j uart_read_LSR_IIR From 6bc4c1318eb5e37f47aab51691a63f5860333883 Mon Sep 17 00:00:00 2001 From: cturek Date: Sun, 6 Nov 2022 21:53:48 +0000 Subject: [PATCH 04/10] Added new macros for int div preprocessing, added p, n, and rightshiftx logic --- pipelined/config/shared/wally-shared.vh | 8 +++- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 3 +- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 42 +++++++++++++------ 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index ca93d7e7b..97feac9e7 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -110,7 +110,7 @@ // division constants `define RADIX 32'h4 -`define DIVCOPIES 32'h3 +`define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF<`XLEN ? `XLEN : (`NF + 3)) // length of input @@ -118,12 +118,16 @@ `define EXTRAINTBITS ((`NF < `XLEN) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? (`NF + 4) : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) -// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +`define RK (`DIVCOPIES*`LOGR) // r*k used for intdiv preproc +`define LOGK ($clog2(`DIVCOPIES)) +`define LOGRK ($clog2(`RK)) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // one iteration is required for the integer bit for minimally redundent radix-4 `define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`QLEN-1) +`define DIVBLEN ($clog2(`DIVb)) `define USE_SRAM 0 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 604a0711f..3f6199933 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -64,10 +64,11 @@ module fdivsqrt( logic Firstun; logic WZero; logic SpecialCaseM; + logic [`DIVBLEN:0] n; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index f1882ad6f..ae015a583 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -41,7 +41,8 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, - output logic [`NE+1:0] QeM, + output logic [`DIVBLEN:0] n, + output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc ); @@ -53,33 +54,50 @@ module fdivsqrtpreproc ( logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`NE+1:0] Qe; // Intdiv signals - logic [`DIVN-1:0] ZeroBufX, ZeroBufY; + logic [`DIVb-1:0] ZeroBufX, ZeroBufY; logic [`XLEN-1:0] PosA, PosB; - logic Signed, Aneg, Bneg; + logic As, Bs; + logic [`XLEN-1:0] A64, B64; + logic [`DIVBLEN:0] p, ZeroDiff, IntBits, RightShiftX; + logic [`DIVBLEN:0] pPlusr, pPrTrunc, pPrCeil; + logic [`DIVb+3:0] PreShiftX; // ***can probably merge X LZC with conversion // cout the number of leading zeros - // Muxes needed for Int; add after Cedar Commit - assign ZeroBufX = MDUE ? {ForwardedSrcAE, {`DIVN-`XLEN{1'b0}}} : {Xm, {`DIVN-`NF-1{1'b0}}}; - assign ZeroBufY = MDUE ? {ForwardedSrcBE, {`DIVN-`XLEN{1'b0}}} : {Ym, {`DIVN-`NF-1{1'b0}}}; + + assign As = ForwardedSrcAE[`XLEN-1] & Funct3E[0]; + assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0]; + assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; + assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; + + assign PosA = As ? -A64 : A64; + assign PosB = Bs ? -B64 : B64; + + assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}}; + assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}}; lzc #(`NF+1) lzcX (Xm, XZeroCnt); lzc #(`NF+1) lzcY (Ym, YZeroCnt); - assign Signed = Funct3E[0]; - assign Aneg = ForwardedSrcAE[`XLEN-1] & Signed; - assign Bneg = ForwardedSrcBE[`XLEN-1] & Signed; - assign PosA = Aneg ? -ForwardedSrcAE : ForwardedSrcAE; - assign PosB = Bneg ? -ForwardedSrcBE : ForwardedSrcBE; - assign PreprocX = Xm[`NF-1:0]<> `LOGRK) + |(pPrTrunc); + // assign n = (pPrCeil << `LOGK) - ((`DIVBLEN)'b1); + // assign IntBits = (`DIVBLEN)'(`RK) + p; + // assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; + assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; // *** explain why X is shifted between radices (initial assignment of WS=RX) if (`RADIX == 2) assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; else assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + // assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; // radix 2 radix 4 From 2cbe2fd70b53196297ead05eda5fe313548f8460 Mon Sep 17 00:00:00 2001 From: cturek Date: Sun, 6 Nov 2022 22:08:18 +0000 Subject: [PATCH 05/10] Added n, p, and m signals between fdivsqrt submodules. Added w64 and mdue to divsqrt testbench. --- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 5 +++-- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 1 + pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 4 ++-- pipelined/testbench/testbench-fp.sv | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 3f6199933..cab1531e9 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -64,11 +64,11 @@ module fdivsqrt( logic Firstun; logic WZero; logic SpecialCaseM; - logic [`DIVBLEN:0] n; + logic [`DIVBLEN:0] n, p, m; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, @@ -83,5 +83,6 @@ module fdivsqrt( fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]), + .n, .p, .m, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 92bb1bd9b..9e9bdb10b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -39,6 +39,7 @@ module fdivsqrtpostproc( input logic SqrtM, input logic SpecialCaseM, input logic RemOp, + input logic [`DIVBLEN:0] n, p, m, output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index ae015a583..893863032 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -41,7 +41,7 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, - output logic [`DIVBLEN:0] n, + output logic [`DIVBLEN:0] n, p, m, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc @@ -58,7 +58,7 @@ module fdivsqrtpreproc ( logic [`XLEN-1:0] PosA, PosB; logic As, Bs; logic [`XLEN-1:0] A64, B64; - logic [`DIVBLEN:0] p, ZeroDiff, IntBits, RightShiftX; + logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrTrunc, pPrCeil; logic [`DIVb+3:0] PreShiftX; diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 748be2280..228bc88f9 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -718,6 +718,7 @@ module testbenchfp; if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), + .MDUE(1'b0), .W64E(1'b0), .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), .QmM(Quot), .DivDone); end From 83051a53515a35fd2615b8405d2e8b1a1cef36c4 Mon Sep 17 00:00:00 2001 From: cturek Date: Sun, 6 Nov 2022 22:21:35 +0000 Subject: [PATCH 06/10] Changed lzc names, started int/fp size merge in preproc --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 97feac9e7..a69814b58 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -127,7 +127,7 @@ `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`QLEN-1) -`define DIVBLEN ($clog2(`DIVb)) +`define DIVBLEN ($clog2(`DIVb+1)-1) `define USE_SRAM 0 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 893863032..4d90185c3 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -51,7 +51,7 @@ module fdivsqrtpreproc ( logic [`NF-1:0] PreprocB, PreprocY; logic [`NF+1:0] SqrtX; logic [`DIVb+3:0] DivX; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`DIVBLEN:0] L; logic [`NE+1:0] Qe; // Intdiv signals logic [`DIVb-1:0] ZeroBufX, ZeroBufY; @@ -75,13 +75,13 @@ module fdivsqrtpreproc ( assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}}; assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}}; - lzc #(`NF+1) lzcX (Xm, XZeroCnt); - lzc #(`NF+1) lzcY (Ym, YZeroCnt); + lzc #(`DIVb) lzcX (ZeroBufX, L); + lzc #(`DIVb) lzcY (ZeroBufY, m); - assign PreprocX = Xm[`NF-1:0]< Date: Sun, 6 Nov 2022 22:24:21 +0000 Subject: [PATCH 07/10] p calculation --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 4d90185c3..50f3e68f0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -81,8 +81,8 @@ module fdivsqrtpreproc ( assign PreprocX = Xm[`NF-1:0]< Date: Sun, 6 Nov 2022 22:31:48 +0000 Subject: [PATCH 08/10] Added n and rightshiftx --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 50f3e68f0..a16e5f795 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -59,7 +59,8 @@ module fdivsqrtpreproc ( logic As, Bs; logic [`XLEN-1:0] A64, B64; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; - logic [`DIVBLEN:0] pPlusr, pPrTrunc, pPrCeil; + logic [`DIVBLEN:0] pPlusr, pPrCeil; + logic [`LOGRK-1:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; // ***can probably merge X LZC with conversion @@ -84,12 +85,12 @@ module fdivsqrtpreproc ( assign ZeroDiff = m - L; assign p = ZeroDiff[`DIVBLEN] ? '0 : ZeroDiff; - // assign pPlusr = (`DIVBLEN)'(`LOGR) + p; - // assign pPrTrunc = pPlusr[`LOGRK-1:0]; - // assign pPrCeil = (pPlusr >> `LOGRK) + |(pPrTrunc); - // assign n = (pPrCeil << `LOGK) - ((`DIVBLEN)'b1); - // assign IntBits = (`DIVBLEN)'(`RK) + p; - // assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; + assign pPlusr = (`DIVBLEN)'(`LOGR) + p; + assign pPrTrunc = pPlusr[`LOGRK-1:0]; + assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)}; + assign n = (pPrCeil << `LOGK) - 1; + assign IntBits = (`DIVBLEN)'(`RK) + p; + assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; From c3e635c78808960cd27c03d5fa070836a268ec7a Mon Sep 17 00:00:00 2001 From: cturek Date: Sun, 6 Nov 2022 22:40:21 +0000 Subject: [PATCH 09/10] Finished Int Preprocessinggit add ../src/fpu/fdivsqrt/fdivsqrtpreproc.sv --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index a16e5f795..44a57af7b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -96,9 +96,9 @@ module fdivsqrtpreproc ( assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; - else assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; - // assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX; + if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; + else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + assign X = MDUE ? PreShiftX >> RightShiftX : PreShiftX; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; // radix 2 radix 4 From 54f09f3616bb89be85f3d45aa9f3102163bd46b8 Mon Sep 17 00:00:00 2001 From: cturek Date: Sun, 6 Nov 2022 23:09:09 +0000 Subject: [PATCH 10/10] Added conditional OTFC swap for simplified int postprocessing --- pipelined/config/shared/wally-shared.vh | 1 + pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 5 +++-- pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv | 1 + pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 6 +++++- pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv | 2 +- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index a69814b58..506cc7c50 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -127,6 +127,7 @@ `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`QLEN-1) +`define DIVa (`DIVb+4-`XLEN) `define DIVBLEN ($clog2(`DIVb+1)-1) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index cab1531e9..3f9c7e8a5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -65,10 +65,11 @@ module fdivsqrt( logic WZero; logic SpecialCaseM; logic [`DIVBLEN:0] n, p, m; + logic OTFCSwap; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .OTFCSwap, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, @@ -78,7 +79,7 @@ module fdivsqrt( fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), - .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .DivBusy); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 17cc3f5c2..d234144c4 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -38,6 +38,7 @@ module fdivsqrtiter( input logic XZeroE, YZeroE, input logic SqrtE, input logic SqrtM, + input logic OTFCSwap, input logic [`DIVb+3:0] X, input logic [`DIVN-2:0] Dpreproc, output logic [`DIVN-2:0] D, // U0.N-1 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 44a57af7b..756c5cc9f 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,6 +42,7 @@ module fdivsqrtpreproc ( input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, output logic [`DIVBLEN:0] n, p, m, + output logic OTFCSwap, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc @@ -56,7 +57,7 @@ module fdivsqrtpreproc ( // Intdiv signals logic [`DIVb-1:0] ZeroBufX, ZeroBufY; logic [`XLEN-1:0] PosA, PosB; - logic As, Bs; + logic As, Bs, OTFCSwapTemp; logic [`XLEN-1:0] A64, B64; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrCeil; @@ -70,6 +71,8 @@ module fdivsqrtpreproc ( assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0]; assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; + + assign OTFCSwapTemp = (As ^ Bs) & MDUE; assign PosA = As ? -A64 : A64; assign PosB = Bs ? -B64 : B64; @@ -111,6 +114,7 @@ module fdivsqrtpreproc ( // r = 1 or 2 // DIVRESLEN/(r*`DIVCOPIES) flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM); + flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap); expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m, .Qe); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 8ed1664af..09f82da81 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -61,7 +61,7 @@ module fdivsqrtstage2 ( // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); - // Sqrt F generatin + // Sqrt F generation fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); // Divisor multiple