From 648c09e5ef15f517392ecd831f4953d371b9169c Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 2 Jul 2021 11:04:13 -0400 Subject: [PATCH 1/4] Optimized PMP checker logic and added support for configurable number of PMP registers --- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/mmu/mmu.sv | 4 +- wally-pipelined/src/mmu/pmpchecker.sv | 79 ++++++++++-------- wally-pipelined/src/privileged/csr.sv | 2 +- wally-pipelined/src/privileged/csrm.sv | 80 +++++++++---------- wally-pipelined/src/privileged/privileged.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 2 +- 8 files changed, 89 insertions(+), 84 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index afae5ff4f..b08a1503e 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -79,7 +79,7 @@ module ifu ( input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ffa79adfe..8c9de2ff7 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -70,7 +70,7 @@ module lsu ( input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index ff315f128..32309baaf 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -70,8 +70,8 @@ module mmu #(parameter ENTRY_BITS = 3, input logic [2:0] HSIZE, HBURST, input logic HWRITE, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM, diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index f88d56fa0..5344249c7 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -35,7 +35,6 @@ module pmpchecker ( input logic [1:0] PrivilegeModeW, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** ModelSim has a switch -svinputport which controls whether input ports // are nets (wires) or vars by default. The default setting of this switch is @@ -48,6 +47,7 @@ module pmpchecker ( // boundary. It would be better to store the PMP address registers in a module // somewhere in the CSR hierarchy and do PMP checking _within_ that module, so // we don't have to pass around 16 whole registers. + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, @@ -60,29 +60,23 @@ module pmpchecker ( ); // Bit i is high when the address falls in PMP region i - logic [15:0] Regions; - logic [3:0] MatchedRegion; - logic Match, EnforcePMP; + logic [`PMP_ENTRIES-1:0] Regions, FirstMatch; + //logic [3:0] MatchedRegion; + logic EnforcePMP; - logic [7:0] PMPCFG [15:0]; + logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; // Bit i is high when the address is greater than or equal to PMPADR[i] // Used for determining whether TOR PMP regions match - logic [15:0] AboveRegion; + logic [`PMP_ENTRIES-1:0] AboveRegion; // Bit i is high if PMP register i is non-null - logic [15:0] ActiveRegion; + logic [`PMP_ENTRIES-1:0] ActiveRegion; - logic L_Bit, X_Bit, W_Bit, R_Bit; - logic InvalidExecute, InvalidWrite, InvalidRead; + logic [`PMP_ENTRIES-1:0] L_Bits, X_Bits, W_Bits, R_Bits; + //logic InvalidExecute, InvalidWrite, InvalidRead; - // *** extend to optionally 64 configurations - - assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12], - PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW; - - assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4], - PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW; + genvar i,j; pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]), @@ -92,7 +86,6 @@ module pmpchecker ( assign ActiveRegion[0] = |PMPCFG[0][4:3]; generate // *** only for PMP_ENTRIES > 0 - genvar i; for (i = 1; i < `PMP_ENTRIES; i++) begin pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]), .CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]), @@ -104,12 +97,34 @@ module pmpchecker ( end endgenerate - assign Match = |Regions; + //assign Match = |Regions; - // Only enforce PMP checking for S and U modes when at least one PMP is active - assign EnforcePMP = |ActiveRegion; - - // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region + // verilator lint_off UNOPTFLAT + logic [`PMP_ENTRIES-1:0] NoLowerMatch; +// assign NoLowerMatch[0] = 1; + generate + // verilator lint_off WIDTH + for (j=0; j<`PMP_ENTRIES; j = j+8) begin + assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], + PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; + end + // verilator lint_on WIDTH + for (i=0; i<`PMP_ENTRIES; i++) begin + if (i==0) begin + assign FirstMatch[i] = Regions[i]; + assign NoLowerMatch[i] = ~Regions[i]; + end else begin + assign FirstMatch[i] = Regions[i] & NoLowerMatch[i]; + assign NoLowerMatch[i] = NoLowerMatch[i-1] & ~Regions[i]; + end + assign L_Bits[i] = PMPCFG[i][7] & FirstMatch[i]; + assign X_Bits[i] = PMPCFG[i][2] & FirstMatch[i]; + assign W_Bits[i] = PMPCFG[i][1] & FirstMatch[i]; + assign R_Bits[i] = PMPCFG[i][0] & FirstMatch[i]; + end + // verilator lint_on UNOPTFLAT + endgenerate +/* // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region always_comb casez (Regions) 16'b???????????????1: MatchedRegion = 0; @@ -134,22 +149,18 @@ module pmpchecker ( assign L_Bit = PMPCFG[MatchedRegion][7] && Match; assign X_Bit = PMPCFG[MatchedRegion][2] && Match; assign W_Bit = PMPCFG[MatchedRegion][1] && Match; - assign R_Bit = PMPCFG[MatchedRegion][0] && Match; + assign R_Bit = PMPCFG[MatchedRegion][0] && Match; assign InvalidExecute = ExecuteAccessF && ~X_Bit; assign InvalidWrite = WriteAccessM && ~W_Bit; - assign InvalidRead = ReadAccessM && ~R_Bit; + assign InvalidRead = ReadAccessM && ~R_Bit;*/ - // *** don't cause faults when there are no PMPs - assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidExecute : - EnforcePMP && InvalidExecute; - assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidWrite : - EnforcePMP && InvalidWrite; - assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidRead : - EnforcePMP && InvalidRead; + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L_Bits : |ActiveRegion; + + assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X_Bits; + assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W_Bits; + assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R_Bits; assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM; diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 213bcde33..dfac55711 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -60,7 +60,7 @@ module csr #(parameter output logic STATUS_MIE, STATUS_SIE, output logic STATUS_MXR, STATUS_SUM, output logic STATUS_MPRV, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index 33b903a83..f30ebb4ff 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -48,25 +48,9 @@ module csrm #(parameter MTVAL = 12'h343, MIP = 12'h344, PMPCFG0 = 12'h3A0, - PMPCFG1 = 12'h3A1, - PMPCFG2 = 12'h3A2, - PMPCFG3 = 12'h3A3, + // .. up to 15 more at consecutive addresses PMPADDR0 = 12'h3B0, - PMPADDR1 = 12'h3B1, - PMPADDR2 = 12'h3B2, - PMPADDR3 = 12'h3B3, - PMPADDR4 = 12'h3B4, - PMPADDR5 = 12'h3B5, - PMPADDR6 = 12'h3B6, - PMPADDR7 = 12'h3B7, - PMPADDR8 = 12'h3B8, - PMPADDR9 = 12'h3B9, - PMPADDR10 = 12'h3BA, - PMPADDR11 = 12'h3BB, - PMPADDR12 = 12'h3BC, - PMPADDR13 = 12'h3BD, - PMPADDR14 = 12'h3BE, - PMPADDR15 = 12'h3BF, + // ... up to 63 more at consecutive addresses TSELECT = 12'h7A0, TDATA1 = 12'h7A1, TDATA2 = 12'h7A2, @@ -90,7 +74,7 @@ module csrm #(parameter output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW, // 64-bit registers in RV64, or two 32-bit registers in RV32 - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [11:0] MIP_REGW, MIE_REGW, output logic WriteMSTATUSM, @@ -103,8 +87,8 @@ module csrm #(parameter logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; - logic WritePMPCFG0M, WritePMPCFG2M; - logic WritePMPADDRM [15:0]; + logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ; + logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; localparam MISA_26 = (`MISA) & 32'h03ffffff; @@ -120,7 +104,7 @@ module csrm #(parameter assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW; assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW; assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW; - assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; +/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW; assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW; assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW; @@ -137,10 +121,13 @@ module csrm #(parameter assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW; assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW; assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW; - assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; + assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */ assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW; assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW; + + + assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs @@ -172,33 +159,39 @@ module csrm #(parameter flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop + + // *** need to add support for locked PMPCFG and PMPADR + genvar i; generate - genvar i; - for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop + for(i=0; i<`PMP_ENTRIES; i++) begin + assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW; flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); end + for (i=0; i<`PMP_ENTRIES/8; i++) begin + if (`XLEN==64) begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]); + end else begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]); + flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); + end + end endgenerate - // PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32 - generate - if (`XLEN==64) begin - flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW); - flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW); - end else begin - logic WritePMPCFG1M, WritePMPCFG3M; - assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1)); - assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3)); - flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]); - flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]); - flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]); - flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]); - end - endgenerate // Read machine mode CSRs + // verilator lint_off WIDTH always_comb begin IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) && (CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode - case (CSRAdrM) + if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry + CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; + else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/8) begin + if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][`XLEN-1:0]; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[CSRAdrM - PMPCFG0][63:32]}; + end + else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; MVENDORID: CSRMReadValM = 0; MARCHID: CSRMReadValM = 0; @@ -219,7 +212,7 @@ module csrm #(parameter MTVAL: CSRMReadValM = MTVAL_REGW; MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; - PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; +/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]}; PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0]; PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]}; @@ -238,11 +231,12 @@ module csrm #(parameter PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12]; PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13]; PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14]; - PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; + PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */ default: begin CSRMReadValM = 0; IllegalCSRMAccessM = 1; end endcase end + // verilator lint_on WIDTH endmodule diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 1275cd4b8..5ed8c8807 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -68,7 +68,7 @@ module privileged ( output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, output logic STATUS_MXR, STATUS_SUM, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW ); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index a77c3ab01..9358417b1 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -126,7 +126,7 @@ module wallypipelinedhart ( logic DSquashBusAccessM, ISquashBusAccessF; logic [5:0] DHSELRegionsM, IHSELRegionsF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. + var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? // IMem stalls From cd6cabac2f1669784e3e7184911a69ab3d32c064 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 2 Jul 2021 11:05:25 -0400 Subject: [PATCH 2/4] Optimized PMP checker logic and added support for configurable number of PMP registers --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 954e126bb..a6f1c0133 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -53,7 +53,7 @@ `define DTLB_ENTRY_BITS 5 // Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 16 +`define PMP_ENTRIES 64 // Address space `define RESET_VECTOR 64'h0000000080000000 From 3f61e313d2707d69c8b1db73806941eb0d536c32 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 2 Jul 2021 12:40:58 -0400 Subject: [PATCH 3/4] FPU update --- wally-pipelined/src/fpu/FPregfile.sv | 54 -- wally-pipelined/src/fpu/bk128.sv | 599 -------------------- wally-pipelined/src/fpu/bk13.sv | 97 ---- wally-pipelined/src/fpu/bk14.sv | 86 --- wally-pipelined/src/fpu/csa.sv | 70 --- wally-pipelined/src/fpu/divconv.sv | 18 +- wally-pipelined/src/fpu/fctrl.sv | 67 ++- wally-pipelined/src/fpu/fma2.sv | 10 +- wally-pipelined/src/fpu/fpadd_denorm.sv | 4 +- wally-pipelined/src/fpu/fpdiv.sv | 256 --------- wally-pipelined/src/fpu/fpu.sv | 369 ++++++------ wally-pipelined/src/fpu/fpuaddcvt1.sv | 4 +- wally-pipelined/src/fpu/fpuaddcvt2.sv | 14 +- wally-pipelined/src/fpu/fpuclassify.sv | 50 -- wally-pipelined/src/fpu/fpucmp1.sv | 465 --------------- wally-pipelined/src/fpu/fpucmp2.sv | 243 -------- wally-pipelined/src/fpu/fpuhazard.sv | 67 --- wally-pipelined/src/fpu/freg.sv | 515 ----------------- wally-pipelined/src/fpu/fsgn.sv | 19 +- wally-pipelined/src/fpu/ling_bk13.sv | 89 --- wally-pipelined/src/fpu/lzd_denorm.sv | 1 + wally-pipelined/src/fpu/mult_R4_64_64_cs.sv | 0 wally-pipelined/src/fpu/rounder_denorm.sv | 6 +- wally-pipelined/src/fpu/sbtm_a4.sv | 204 ------- wally-pipelined/src/fpu/sk14.sv | 90 --- 25 files changed, 254 insertions(+), 3143 deletions(-) delete mode 100644 wally-pipelined/src/fpu/FPregfile.sv delete mode 100755 wally-pipelined/src/fpu/bk128.sv delete mode 100755 wally-pipelined/src/fpu/bk13.sv delete mode 100755 wally-pipelined/src/fpu/bk14.sv delete mode 100644 wally-pipelined/src/fpu/csa.sv delete mode 100755 wally-pipelined/src/fpu/fpdiv.sv delete mode 100644 wally-pipelined/src/fpu/fpuclassify.sv delete mode 100755 wally-pipelined/src/fpu/fpucmp1.sv delete mode 100755 wally-pipelined/src/fpu/fpucmp2.sv delete mode 100644 wally-pipelined/src/fpu/fpuhazard.sv delete mode 100755 wally-pipelined/src/fpu/freg.sv delete mode 100755 wally-pipelined/src/fpu/ling_bk13.sv mode change 100755 => 100644 wally-pipelined/src/fpu/mult_R4_64_64_cs.sv delete mode 100755 wally-pipelined/src/fpu/sbtm_a4.sv delete mode 100755 wally-pipelined/src/fpu/sk14.sv diff --git a/wally-pipelined/src/fpu/FPregfile.sv b/wally-pipelined/src/fpu/FPregfile.sv deleted file mode 100644 index 99d18bce9..000000000 --- a/wally-pipelined/src/fpu/FPregfile.sv +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////// -// regfile.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: 4-port register file -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module FPregfile ( - input logic clk, reset, - input logic we4, - input logic [ 4:0] a1, a2, a3, a4, - input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32 - output logic [63:0] rd1, rd2, rd3); - - logic [63:0] rf[31:0]; - integer i; - - // three ported register file - // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) - // write fourth port on rising edge of clock (A4/WD4/WE4) - // write occurs on falling edge of clock - - // reset is intended for simulation only, not synthesis - - always_ff @(negedge clk or posedge reset) - if (reset) for(i=0; i<32; i++) rf[i] <= 0; - else if (we4) rf[a4] <= wd4; - - assign #2 rd1 = rf[a1]; - assign #2 rd2 = rf[a2]; - assign #2 rd3 = rf[a3]; - -endmodule // regfile - diff --git a/wally-pipelined/src/fpu/bk128.sv b/wally-pipelined/src/fpu/bk128.sv deleted file mode 100755 index a302a0310..000000000 --- a/wally-pipelined/src/fpu/bk128.sv +++ /dev/null @@ -1,599 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk128 (cout, sum, a, b, cin); - - input [127:0] a, b; - input cin; - - output [127:0] sum; - output cout; - - wire [128:0] p,g,t; - wire [127:0] c; - - // pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]^g[12]; - assign t[14]=p[14]; - assign t[15]=p[15]^g[14]; - assign t[16]=p[16]; - assign t[17]=p[17]^g[16]; - assign t[18]=p[18]; - assign t[19]=p[19]^g[18]; - assign t[20]=p[20]; - assign t[21]=p[21]^g[20]; - assign t[22]=p[22]; - assign t[23]=p[23]^g[22]; - assign t[24]=p[24]; - assign t[25]=p[25]^g[24]; - assign t[26]=p[26]; - assign t[27]=p[27]^g[26]; - assign t[28]=p[28]; - assign t[29]=p[29]^g[28]; - assign t[30]=p[30]; - assign t[31]=p[31]^g[30]; - assign t[32]=p[32]; - assign t[33]=p[33]^g[32]; - assign t[34]=p[34]; - assign t[35]=p[35]^g[34]; - assign t[36]=p[36]; - assign t[37]=p[37]^g[36]; - assign t[38]=p[38]; - assign t[39]=p[39]^g[38]; - assign t[40]=p[40]; - assign t[41]=p[41]^g[40]; - assign t[42]=p[42]; - assign t[43]=p[43]^g[42]; - assign t[44]=p[44]; - assign t[45]=p[45]^g[44]; - assign t[46]=p[46]; - assign t[47]=p[47]^g[46]; - assign t[48]=p[48]; - assign t[49]=p[49]^g[48]; - assign t[50]=p[50]; - assign t[51]=p[51]^g[50]; - assign t[52]=p[52]; - assign t[53]=p[53]^g[52]; - assign t[54]=p[54]; - assign t[55]=p[55]^g[54]; - assign t[56]=p[56]; - assign t[57]=p[57]^g[56]; - assign t[58]=p[58]; - assign t[59]=p[59]^g[58]; - assign t[60]=p[60]; - assign t[61]=p[61]^g[60]; - assign t[62]=p[62]; - assign t[63]=p[63]^g[62]; - assign t[64]=p[64]; - assign t[65]=p[65]^g[64]; - assign t[66]=p[66]; - assign t[67]=p[67]^g[66]; - assign t[68]=p[68]; - assign t[69]=p[69]^g[68]; - assign t[70]=p[70]; - assign t[71]=p[71]^g[70]; - assign t[72]=p[72]; - assign t[73]=p[73]^g[72]; - assign t[74]=p[74]; - assign t[75]=p[75]^g[74]; - assign t[76]=p[76]; - assign t[77]=p[77]^g[76]; - assign t[78]=p[78]; - assign t[79]=p[79]^g[78]; - assign t[80]=p[80]; - assign t[81]=p[81]^g[80]; - assign t[82]=p[82]; - assign t[83]=p[83]^g[82]; - assign t[84]=p[84]; - assign t[85]=p[85]^g[84]; - assign t[86]=p[86]; - assign t[87]=p[87]^g[86]; - assign t[88]=p[88]; - assign t[89]=p[89]^g[88]; - assign t[90]=p[90]; - assign t[91]=p[91]^g[90]; - assign t[92]=p[92]; - assign t[93]=p[93]^g[92]; - assign t[94]=p[94]; - assign t[95]=p[95]^g[94]; - assign t[96]=p[96]; - assign t[97]=p[97]^g[96]; - assign t[98]=p[98]; - assign t[99]=p[99]^g[98]; - assign t[100]=p[100]; - assign t[101]=p[101]^g[100]; - assign t[102]=p[102]; - assign t[103]=p[103]^g[102]; - assign t[104]=p[104]; - assign t[105]=p[105]^g[104]; - assign t[106]=p[106]; - assign t[107]=p[107]^g[106]; - assign t[108]=p[108]; - assign t[109]=p[109]^g[108]; - assign t[110]=p[110]; - assign t[111]=p[111]^g[110]; - assign t[112]=p[112]; - assign t[113]=p[113]^g[112]; - assign t[114]=p[114]; - assign t[115]=p[115]^g[114]; - assign t[116]=p[116]; - assign t[117]=p[117]^g[116]; - assign t[118]=p[118]; - assign t[119]=p[119]^g[118]; - assign t[120]=p[120]; - assign t[121]=p[121]^g[120]; - assign t[122]=p[122]; - assign t[123]=p[123]^g[122]; - assign t[124]=p[124]; - assign t[125]=p[125]^g[124]; - assign t[126]=p[126]; - assign t[127]=p[127]^g[126]; - assign t[128]=p[128]; - - // prefix tree - brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]); - - // post-computation - assign sum=p[128:1]^c; - assign cout=g[128]|(p[128]&c[127]); - -endmodule - -module brent_kung_cs128 (c, p, g); - - input [127:0] p; - input [127:0] g; - output [128:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]}); - - black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]}); - black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]}); - black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]}); - black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]}); - black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]}); - black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]}); - black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]}); - black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]}); - - black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]}); - black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]}); - black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]}); - black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]}); - black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]}); - black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]}); - black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]}); - black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]}); - - black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]}); - black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]}); - black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]}); - black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]}); - black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]}); - black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]}); - black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]}); - black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]}); - - black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]}); - black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]}); - black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]}); - black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]}); - black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]}); - black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]}); - black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]}); - black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]}); - - black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]}); - black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]}); - black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]}); - black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]}); - black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]}); - black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]}); - black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]}); - black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]}); - - black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]}); - black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]}); - black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]}); - black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]}); - black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]}); - black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]}); - black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]}); - black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]}); - - black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]}); - black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]}); - black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]}); - black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]}); - black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]}); - black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]}); - black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]}); - black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]}); - - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16}); - black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20}); - black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24}); - black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28}); - - black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32}); - black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36}); - black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40}); - black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44}); - black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48}); - black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52}); - black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56}); - black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60}); - - black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64}); - black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68}); - black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72}); - black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76}); - black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80}); - black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84}); - black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88}); - black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92}); - - black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96}); - black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100}); - black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104}); - black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108}); - black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112}); - black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116}); - black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120}); - black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124}); - - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16}); - black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24}); - black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32}); - black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40}); - black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48}); - black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56}); - - black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64}); - black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72}); - black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80}); - black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88}); - black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96}); - black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104}); - black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112}); - black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120}); - - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16}); - black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32}); - black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48}); - black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64}); - black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80}); - black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96}); - black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112}); - - - // Stage 5: Generates G/P pairs that span 16 bits - grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16); - black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32}); - black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64}); - black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96}); - - // Stage 6: Generates G/P pairs that span 32 bits - grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32); - black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64}); - - // Stage 7: Generates G/P pairs that span 64 bits - grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64); - - // Stage 8: Generates G/P pairs that span 32 bits - grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64); - - // Stage 9: Generates G/P pairs that span 16 bits - grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32); - grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64); - grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96); - - // Stage 10: Generates G/P pairs that span 8 bits - grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16); - grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32); - grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48); - grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64); - grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80); - grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96); - grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112); - - // Stage 11: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16); - grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24); - grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32); - grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40); - grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48); - grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56); - grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64); - grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72); - grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80); - grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88); - grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96); - grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104); - grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112); - grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120); - - // Stage 12: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16); - grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20); - grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24); - grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28); - grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32); - grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36); - grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40); - grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44); - grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48); - grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52); - grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56); - grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60); - grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64); - grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68); - grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72); - grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76); - grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80); - grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84); - grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88); - grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92); - grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96); - grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100); - grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104); - grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108); - grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112); - grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116); - grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120); - grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]); - grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]); - grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]); - grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]); - grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]); - grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]); - grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]); - grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]); - grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]); - grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]); - grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]); - grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]); - grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]); - grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]); - grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]); - grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]); - grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]); - grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]); - grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]); - grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]); - grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]); - grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]); - grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]); - grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]); - grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]); - grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]); - grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]); - grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]); - grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]); - grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]); - grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]); - grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]); - grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]); - grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]); - grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]); - grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]); - grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]); - grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]); - grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]); - grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]); - grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]); - grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]); - grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]); - grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]); - grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]); - grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]); - grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]); - grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]); - grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]); - grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]); - grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]); - grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]); - grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]); - grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]); - grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]); - grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]); - grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - assign c[15]=G_14_0; - assign c[16]=G_15_0; - assign c[17]=G_16_0; - - assign c[18]=G_17_0; - assign c[19]=G_18_0; - assign c[20]=G_19_0; - assign c[21]=G_20_0; - assign c[22]=G_21_0; - assign c[23]=G_22_0; - assign c[24]=G_23_0; - assign c[25]=G_24_0; - - assign c[26]=G_25_0; - assign c[27]=G_26_0; - assign c[28]=G_27_0; - assign c[29]=G_28_0; - assign c[30]=G_29_0; - assign c[31]=G_30_0; - assign c[32]=G_31_0; - assign c[33]=G_32_0; - - assign c[34]=G_33_0; - assign c[35]=G_34_0; - assign c[36]=G_35_0; - assign c[37]=G_36_0; - assign c[38]=G_37_0; - assign c[39]=G_38_0; - assign c[40]=G_39_0; - assign c[41]=G_40_0; - - assign c[42]=G_41_0; - assign c[43]=G_42_0; - assign c[44]=G_43_0; - assign c[45]=G_44_0; - assign c[46]=G_45_0; - assign c[47]=G_46_0; - assign c[48]=G_47_0; - assign c[49]=G_48_0; - - assign c[50]=G_49_0; - assign c[51]=G_50_0; - assign c[52]=G_51_0; - assign c[53]=G_52_0; - assign c[54]=G_53_0; - assign c[55]=G_54_0; - assign c[56]=G_55_0; - assign c[57]=G_56_0; - - assign c[58]=G_57_0; - assign c[59]=G_58_0; - assign c[60]=G_59_0; - assign c[61]=G_60_0; - assign c[62]=G_61_0; - assign c[63]=G_62_0; - assign c[64]=G_63_0; - assign c[65]=G_64_0; - - assign c[66]=G_65_0; - assign c[67]=G_66_0; - assign c[68]=G_67_0; - assign c[69]=G_68_0; - assign c[70]=G_69_0; - assign c[71]=G_70_0; - assign c[72]=G_71_0; - assign c[73]=G_72_0; - - assign c[74]=G_73_0; - assign c[75]=G_74_0; - assign c[76]=G_75_0; - assign c[77]=G_76_0; - assign c[78]=G_77_0; - assign c[79]=G_78_0; - assign c[80]=G_79_0; - assign c[81]=G_80_0; - - assign c[82]=G_81_0; - assign c[83]=G_82_0; - assign c[84]=G_83_0; - assign c[85]=G_84_0; - assign c[86]=G_85_0; - assign c[87]=G_86_0; - assign c[88]=G_87_0; - assign c[89]=G_88_0; - - assign c[90]=G_89_0; - assign c[91]=G_90_0; - assign c[92]=G_91_0; - assign c[93]=G_92_0; - assign c[94]=G_93_0; - assign c[95]=G_94_0; - assign c[96]=G_95_0; - assign c[97]=G_96_0; - - assign c[98]=G_97_0; - assign c[99]=G_98_0; - assign c[100]=G_99_0; - assign c[101]=G_100_0; - assign c[102]=G_101_0; - assign c[103]=G_102_0; - assign c[104]=G_103_0; - assign c[105]=G_104_0; - - assign c[106]=G_105_0; - assign c[107]=G_106_0; - assign c[108]=G_107_0; - assign c[109]=G_108_0; - assign c[110]=G_109_0; - assign c[111]=G_110_0; - assign c[112]=G_111_0; - assign c[113]=G_112_0; - - assign c[114]=G_113_0; - assign c[115]=G_114_0; - assign c[116]=G_115_0; - assign c[117]=G_116_0; - assign c[118]=G_117_0; - assign c[119]=G_118_0; - assign c[120]=G_119_0; - assign c[121]=G_120_0; - - assign c[122]=G_121_0; - assign c[123]=G_122_0; - assign c[124]=G_123_0; - assign c[125]=G_124_0; - assign c[126]=G_125_0; - assign c[127]=G_126_0; - assign c[128]=G_127_0; - -endmodule // brent_kung_cs - - diff --git a/wally-pipelined/src/fpu/bk13.sv b/wally-pipelined/src/fpu/bk13.sv deleted file mode 100755 index 84158db98..000000000 --- a/wally-pipelined/src/fpu/bk13.sv +++ /dev/null @@ -1,97 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g,t; - wire [12:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]; - -// prefix tree - brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]); - -// post-computation - assign sum=p[13:1]^c; - assign cout=g[13]|(p[13]&c[12]); - -endmodule - -module brent_kung_cs13 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - -endmodule diff --git a/wally-pipelined/src/fpu/bk14.sv b/wally-pipelined/src/fpu/bk14.sv deleted file mode 100755 index 46872167e..000000000 --- a/wally-pipelined/src/fpu/bk14.sv +++ /dev/null @@ -1,86 +0,0 @@ -// Brent-Kung Prefix Adder - -module bk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - brent_kung14 prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module brent_kung14 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [14:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule diff --git a/wally-pipelined/src/fpu/csa.sv b/wally-pipelined/src/fpu/csa.sv deleted file mode 100644 index 1e5682cfc..000000000 --- a/wally-pipelined/src/fpu/csa.sv +++ /dev/null @@ -1,70 +0,0 @@ -module ha (C, S, A, B) ; - - input A, B; - output S, C; - - assign S = A^B; - assign C = A&B; - -endmodule // HA - -// module fa (input logic a, b, c, output logic sum, carry); - -// assign sum = a^b^c; -// assign carry = a&b|a&c|b&c; - -// endmodule // fa - -// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout); - -// input logic [WIDTH-1:0] a, b, c; - -// output logic [WIDTH-1:0] sum, carry; -// output logic cout; - -// logic [WIDTH:0] carry_temp; -// genvar i; -// generate -// for (i=0;i fp // fmv.w.x = ???0 // fmv.w.d = ???1 diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 131f98394..518b7a76c 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -16,8 +16,8 @@ module fma2( input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + output logic [63:0] FMAResM, // FMA final result + output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} @@ -57,7 +57,7 @@ module fma2( logic [12:0] MaxExp; // maximum value of the exponent logic [12:0] FracLen; // length of the fraction logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency) logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results @@ -316,7 +316,7 @@ module fma2( // Combine flags // - FMA can't set the Divide by zero flag // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -337,7 +337,7 @@ module fma2( assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : + assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf diff --git a/wally-pipelined/src/fpu/fpadd_denorm.sv b/wally-pipelined/src/fpu/fpadd_denorm.sv index eabfcd3a1..43de30879 100755 --- a/wally-pipelined/src/fpu/fpadd_denorm.sv +++ b/wally-pipelined/src/fpu/fpadd_denorm.sv @@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn); assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (sum, mantissaA3, mantissaB3, sub); + cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); + cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder // Determine the correct sign of the result assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63]; diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv deleted file mode 100755 index 8c305f3ea..000000000 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ /dev/null @@ -1,256 +0,0 @@ -// -// File name : fpdiv -// Title : Floating-Point Divider/Square-Root -// project : FPU -// Library : fpdiv -// Author(s) : James E. Stine, Jr. -// Purpose : definition of main unit to floating-point div/sqrt -// notes : -// -// Copyright Oklahoma State University -// -// Basic Operations -// -// Step 1: Load operands, set flags, and convert SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Exponent Logic -// Step 4: Divide/Sqrt using Goldschmidt -// Step 5: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 6: Round the result.// -// Step 7: Put quotient/remainder onto output. -// - -// `timescale 1ps/1ps -module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, - FDivStartE, reset, clk, FDivBusyE, HoldInputs); - - input [63:0] DivInput1E; // 1st input operand (A) - input [63:0] DivInput2E; // 2nd input operand (B) - input [2:0] FrmE; // Rounding mode - specify values - input DivOpType; // Function opcode - input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this - input DivOvEn; // Overflow trap enabled - input DivUnEn; // Underflow trap enabled - - input FDivStartE; - input reset; - input clk; - - output [63:0] FDivResultM; // Result of operation - output [4:0] FDivFlagsM; // IEEE exception flags - output DivDenormM; // DivDenormM on input or output - output FDivSqrtDoneE; - output FDivBusyE, HoldInputs; - - supply1 vdd; - supply0 vss; - - wire [63:0] Float1; - wire [63:0] Float2; - wire [63:0] IntValue; - - wire [12:0] exp1, exp2, expF; - wire [12:0] exp_diff, bias; - wire [13:0] exp_sqrt; - wire [12:0] exp_s; - wire [12:0] exp_c; - - wire [10:0] exponent, exp_pre; - wire [63:0] Result; - wire [52:0] mantissaA; - wire [52:0] mantissaB; - wire [63:0] sum, sum_tc, sum_corr, sum_norm; - - wire [5:0] align_shift; - wire [5:0] norm_shift; - wire [2:0] sel_inv; - wire op1_Norm, op2_Norm; - wire opA_Norm, opB_Norm; - wire Invalid; - wire DenormIn, DenormIO; - wire [4:0] FlagsIn; - wire exp_gt63; - wire Sticky_out; - wire signResult, sign_corr; - wire corr_sign; - wire zeroB; - wire convert; - wire swap; - wire sub; - - wire [63:0] q1, qm1, qp1, q0, qm0, qp0; - wire [63:0] rega_out, regb_out, regc_out, regd_out; - wire [127:0] regr_out; - wire [2:0] sel_muxa, sel_muxb; - wire sel_muxr; - wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs; - - wire donev, sel_muxrv, sel_muxsv; - wire [1:0] sel_muxav, sel_muxbv; - wire load_regav, load_regbv, load_regcv; - wire load_regrv, load_regsv; - - logic exp_cout1, exp_cout2, exp_odd, open; - - - // Convert the input operands to their appropriate forms based on - // the orignal operands, the DivOpType , and their precision FmtE. - // Single precision inputs are converted to double precision - // and the sign of the first operand is set appropratiately based on - // if the operation is absolute value or negation. - convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); - - // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in - // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. - // sub is one if the effective operation is subtaction. - exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, - Float1, Float2, DivOpType); - - // Determine Sign/Mantissa - assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType; - assign mantissaA = {vdd, Float1[51:0]}; - assign mantissaB = {vdd, Float2[51:0]}; - // Perform Exponent Subtraction - expA - expB + Bias - assign exp1 = {2'b0, Float1[62:52]}; - assign exp2 = {2'b0, Float2[62:52]}; - // bias : DP = 2^{11-1}-1 = 1023 - assign bias = {3'h0, 10'h3FF}; - // Divide exponent - csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); - exp_add explogic1 (exp_cout1, {open, exp_diff}, - {vss, exp_s}, {vss, exp_c}, 1'b1); - // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? vss : vdd; - exp_add explogic2 (exp_cout2, exp_sqrt, - {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); - // Choose correct exponent - assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; - - // Main Goldschmidt/Division Routine - divconv goldy (q1, qm1, qp1, q0, qm0, qp0, - rega_out, regb_out, regc_out, regd_out, - regr_out, mantissaB, mantissaA, - sel_muxa, sel_muxb, sel_muxr, - reset, clk, - load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, FmtE, DivOpType, exp_odd); - - // FSM : control divider - fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, - clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs); - - // Round the mantissa to a 52-bit value, with the leading one - // removed. The rounding units also handles special cases and - // set the exception flags. - //***add max magnitude and swap negitive and positive infinity - rounder_div divround1 (Result, DenormIO, FlagsIn, - FrmE, FmtE, DivOvEn, DivUnEn, expF, - sel_inv, Invalid, DenormIn, signResult, - q1, qm1, qp1, q0, qm0, qp0, regr_out); - - // Store the final result and the exception flags in registers. - flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); - flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); - flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM); - -endmodule // fpadd - -// -// Brent-Kung Prefix Adder -// (yes, it is 14 bits as my generator is broken for 13 bits :( -// assume, synthesizer will delete stuff not needed ) -// -module exp_add (cout, sum, a, b, cin); - - input [13:0] a, b; - input cin; - - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - - // pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - - // prefix tree - brent_kung prefix_tree(c, p[13:0], g[13:0]); - - // post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule // exp_add - -module brent_kung (c, p, g); - - input [13:0] p; - input [13:0] g; - output [14:1] c; - - logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8; - logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8; - logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/FmtE pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/FmtE pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/FmtE pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/FmtE pairs that span 8 bits - - // Stage 5: Generates G/FmtE pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/FmtE pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule // brent_kung - diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 5c15268ed..ff29dfd70 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,7 +34,7 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic FStallD, // Stall the decode stage if Div/Sqrt instruction + output logic FStallD, // Stall the decode stage output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, @@ -42,48 +42,38 @@ module fpu ( output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result - +// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // control logic signal instantiation logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register - logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD; // Read and write enable for memory - logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal - logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal - logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal - logic SrcYUsedD; // Is input 2 used - logic SrcZUsedD; // Is input 3 used + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals - logic DivDenormE, DivDenormM, DivDenormW; - logic DivOvEn, DivUnEn; - logic [63:0] FDivResultE, FDivResultM, FDivResultW; - logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW; - logic FDivSqrtDoneE, FDivSqrtDoneM; + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; logic [63:0] DivInput1E, DivInput2E; logic HoldInputs; // keep forwarded inputs arround durring division // FMA signals - logic [105:0] ProdManE, ProdManM; + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units logic [161:0] AlignedAddendE, AlignedAddendM; logic [12:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; @@ -91,93 +81,112 @@ module fpu ( logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FmaResultM, FmaResultW; - logic [4:0] FmaFlagsM, FmaFlagsW; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; // add/cvt signals - logic [63:0] AddSumE, AddSumTcE; - logic [3:0] AddSelInvE; - logic [10:0] AddExpPostSumE; - logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; - logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; - logic AddConvertE; - logic [63:0] AddFloat1E, AddFloat2E; - logic [11:0] AddExp1DenormE, AddExp2DenormE; - logic [10:0] AddExponentE; - logic [2:0] AddRmE; - logic [3:0] AddOpTypeE; - logic AddPE, AddOvEnE, AddUnEnE; - logic AddDenormM; - logic [63:0] AddSumM, AddSumTcM; - logic [3:0] AddSelInvM; - logic [10:0] AddExpPostSumM; - logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; - logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; - logic AddConvertM, AddSignM; - logic [63:0] AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentM; - logic [63:0] AddOp1M, AddOp2M; - logic [2:0] AddRmM; - logic [3:0] AddOpTypeM; - logic AddPM, AddOvEnM, AddUnEnM; - logic [63:0] FAddResultM, FAddResultW; - logic [4:0] FAddFlagsM, FAddFlagsW; + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; // cmp signals - logic CmpInvalidE, CmpInvalidM, CmpInvalidW; - logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; // fsgn signals - logic [63:0] SgnResultE, SgnResultM, SgnResultW; - logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; + logic [63:0] AlignedSrcAM; // classify signals - logic [63:0] ClassResultE, ClassResultM, ClassResultW; + logic [63:0] ClassResE, ClassResM; // 64-bit FPU result - logic [63:0] FPUResult64W, FPUResult64E; + logic [63:0] FPUResult64W; logic [4:0] FPUFlagsW; + + + + + + + //DECODE STAGE // top-level controller for FPU - fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // regfile instantiation - FPregfile fpregfile (clk, reset, FWriteEnW, + fregfile fregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResult64W, FRD1D, FRD2D, FRD3D); + + + + + + + + //***************** - // fpregfile D/E pipe registers + // D/E pipe registers //***************** flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - - //***************** - // other D/E pipe registers - //***************** - flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + //EXECUTION STAGE // Hazard unit for FPU - fpuhazard hazard(.*); + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); @@ -186,7 +195,9 @@ module fpu ( // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); // first and only instance of floating-point divider logic fpdivClk; @@ -204,174 +215,140 @@ module fpu ( .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.*); + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); // first and only instance of floating-point sign converter - fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); // first and only instance of floating-point classify unit - fpuclassify fpuclass (.*); + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); // output for store instructions assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - + //***swap to mux + + + + + + + + + + //***************** - //fpregfile D/E pipe registers + // E/M pipe registers //***************** flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - //***************** - // fma E/M pipe registers - //***************** - flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM); - flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM); - flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM); - flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM); - flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM); - flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM); - flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM); - flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM); - flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM); - flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM); - flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM); + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - //***************** - // fpadd E/M pipe registers - //***************** flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - //***************** - // fpcmp E/M pipe registers - //***************** - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); - flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - //***************** - // fpsgn E/M pipe registers - //***************** - flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM); - - //***************** - // other E/M pipe registers - //***************** flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - //***************** - // fpuclassify E/M pipe registers - //***************** - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM); - + + + + + + + //BEGIN MEMORY STAGE - mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); - assign FFlgM = CmpInvalidM & FResSelM[1]; + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + //***change to mux assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.*); + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - + + + + + + + //***************** - //fpregfile M/W pipe registers + // M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); - flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - //***************** - // fma M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW); - - //***************** - // fpdiv M/W pipe registers - //***************** flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - //***************** - // fpadd M/W pipe registers - //***************** - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - //***************** - // fpcmp M/W pipe registers - //***************** - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); - // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - //***************** - // fpsgn M/W pipe registers - //***************** - flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW); - - //***************** - // other M/W pipe registers - //***************** flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - //***************** - // fpuclassify M/W pipe registers - //***************** - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -385,13 +362,13 @@ module fpu ( - +//***turn into muxs always_comb begin case (FResultSelW) 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FmaFlagsW; - 3'b010 : FPUFlagsW = FAddFlagsW; - 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase @@ -400,8 +377,8 @@ module fpu ( always_comb begin case (FResultSelW) 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FmaResultW; - 3'b010 : FPUResult64W = FAddResultW; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; 3'b011 : FPUResult64W = FDivResultW; 3'b100 : FPUResult64W = FResW; default : FPUResult64W = 64'bxxxxx; @@ -415,7 +392,9 @@ module fpu ( // define offsets for LSB zero extension or truncation always_comb begin // zero extension +//***turn into mux FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage SetFflagsM = FPUFlagsW; end diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index 8f045dcdb..1b86b1984 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); + cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); + cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder // Finds normal underflow result to determine whether to round final exponent down //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv index 46eac200f..1fe8ac658 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt2.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv @@ -27,7 +27,7 @@ // -module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); +module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); input [2:0] FrmM; // Rounding mode - specify values input [3:0] FOpCtrlM; // Function opcode @@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS input AddSwapM; // input AddNormOvflowM; - output [63:0] FAddResultM; // Result of operation - output [4:0] FAddFlagsM; // IEEE exception flags - output AddDenormM; // AddDenormM on input or output + output [63:0] FAddResM; // Result of operation + output [4:0] FAddFlgM; // IEEE exception flags + wire AddDenormM; // AddDenormM on input or output wire P; assign P = ~FmtM | FOpCtrlM[2]; @@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS // exactly where the rounding point is. The rounding units also // handles special cases and set the exception flags. - // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to + // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to // help in processor reservation station detection of load/stores. In // other words, the processor would like to know ahead of time that // if the result is an exception then don't load or store. @@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM); // Store the final result and the exception flags in registers. - assign FAddResultM = Result; - assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn}; + assign FAddResM = Result; + assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn}; endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv deleted file mode 100644 index b320b2f07..000000000 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ /dev/null @@ -1,50 +0,0 @@ - -`include "wally-config.vh" - -module fpuclassify ( - input logic [63:0] SrcXE, - input logic FmtE, // 0-single 1-double - output logic [63:0] ClassResultE - ); - - logic [31:0] single; - logic [63:0] double; - logic sign; - logic infinity, NaN, zero, normal, subnormal; - logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; - - // single and double precision layouts - assign single = SrcXE[63:32]; - assign double = SrcXE; - assign sign = SrcXE[63]; - - // basic calculations for readabillity - assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; - assign ExpZero = ~ExpNotZero; - assign ExpOnes = FmtE ? &double[62:52] : &single[30:23]; - assign ManNotZero = FmtE ? |double[51:0] : |single[22:0]; - assign ManZero = ~ManNotZero; - assign FirstBitMan = FmtE ? double[51] : single[22]; - - // determine the type of number - assign NaN = ExpOnes & ManNotZero; - assign infinity = ExpOnes & ManZero; - assign zero = ExpZero & ManZero; - assign subnormal= ExpZero & ManNotZero; - assign normal = ExpNotZero; - - // determine sub category and combine into the result - // bit 0 - -infinity - // bit 1 - -normal - // bit 2 - -subnormal - // bit 3 - -zero - // bit 4 - +zero - // bit 5 - +subnormal - // bit 6 - +normal - // bit 7 - +infinity - // bit 8 - signaling NaN - // bit 9 - quiet NaN - assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; - -endmodule diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv deleted file mode 100755 index 3a8245e63..000000000 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ /dev/null @@ -1,465 +0,0 @@ - -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 - -`include "wally-config.vh" -module fpucmp1 ( - input logic [63:0] op1, - input logic [63:0] op2, - input logic [2:0] FOpCtrlE, - input logic FmtE, - - - output logic Invalid, // Invalid Operation - // output logic [1:0] FCC, // Condition Codes - output logic [63:0] FCmpResultE); - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - logic [1:0] FCC; // Condition Codes - logic [7:0] w, x; - logic ANaN, BNaN; - logic Azero, Bzero; - logic LT; // magnitude op1 < magnitude op2 - logic EQ; // magnitude op1 = magnitude op2 - - magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); - - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); - - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - magcompare64b_2 magcomp2 (LT, EQ, w, x); - - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); - -endmodule // fpcomp - -// module magcompare2b (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// // Determine if A < B using a minimized sum-of-products expression -// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; -// // Determine if A > B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -module magcompare2c (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - assign LT = B[1] | (!A[1]&B[0]); - assign GT = A[1] | (!B[1]&A[0]); - -endmodule // magcompare2b - -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare64b_1 (w, x, A, B); - - input logic [63:0] A; - input logic [63:0] B; - - logic [31:0] s; - logic [31:0] t; - logic [15:0] u; - logic [15:0] v; - output logic [7:0] w; - output logic [7:0] x; - - magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]); - magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]); - magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]); - magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]); - magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]); - magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]); - magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]); - magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]); - magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]); - magcompare2b magA(s[9], t[9], A[19:18], B[19:18]); - magcompare2b magB(s[10], t[10], A[21:20], B[21:20]); - magcompare2b magC(s[11], t[11], A[23:22], B[23:22]); - magcompare2b magD(s[12], t[12], A[25:24], B[25:24]); - magcompare2b magE(s[13], t[13], A[27:26], B[27:26]); - magcompare2b magF(s[14], t[14], A[29:28], B[29:28]); - magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]); - magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]); - magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]); - magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]); - magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]); - magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]); - magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]); - magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]); - magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]); - magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]); - magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]); - magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]); - magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]); - magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]); - magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]); - magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]); - magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]); - - magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]); - magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]); - magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]); - magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]); - magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]); - magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]); - magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]); - magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]); - magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]); - magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]); - magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]); - magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]); - magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]); - magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]); - magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]); - magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]); - - magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]); - magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]); - magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]); - magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]); - magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]); - magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]); - magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]); - magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); - - input logic [63:0] A; - input logic [63:0] B; - input logic [2:0] FOpCtrlE; - - logic dp, sp, hp; - - output logic ANaN; - output logic BNaN; - output logic Azero; - output logic Bzero; - - assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; - assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; - assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; - - // Test if A or B is NaN. - assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & - ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | - (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | - (hp&(A[57]|A[56]))); - - assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & - ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | - (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | - (hp&(B[57]|B[56]))); - - // Test if A is +0 or -0 when viewed as a floating point number (i.e, - // the 63 least siginficant bits of A are zero). - // Depending on how this synthesizes, it may work better to replace - // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) - assign Azero = (A[62:0] == 63'h0); - assign Bzero = (B[62:0] == 63'h0); - -endmodule // exception_cmp -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 - - -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); - -// endmodule // magcompare2b - -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare64b_2 (LT, EQ, w, x); - - input logic [7:0] w; - input logic [7:0] x; - logic [3:0] y; - logic [3:0] z; - logic [1:0] a; - logic [1:0] b; - logic GT; - - output logic LT; - output logic EQ; - - magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); - magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); - magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); - magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - - magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); - magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - - magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - - assign EQ = ~(LT | GT); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of -// operands being compared as indicated below. -// FOpCtrlE Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_2 ( - input logic [63:0] A, - input logic [63:0] B, - input logic FmtE, - input logic LT_mag, - input logic EQ_mag, - input logic [2:0] FOpCtrlE, - - output logic invalid, - output logic [1:0] fcc, - output logic [63:0] FCmpResultE, - - input logic Azero, - input logic Bzero, - input logic ANaN, - input logic BNaN); - - logic dp; - logic sp; - logic hp; - logic ASNaN; - logic BSNaN; - logic UO; - logic GT; - logic LT; - logic EQ; - logic [62:0] sixtythreezeros = 63'h0; - - assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; - assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; - assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; - - // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating - // point comparison is being performed. - assign UO = (ANaN | BNaN); - - // Test if A or B is a signaling NaN. - assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); - assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - - // If either A or B is a signaling NaN the "Invalid Operation" - // exception flag is set to one; otherwise it is zero. - assign invalid = (ASNaN | BSNaN); - - // A and B are equal if (their magnitudes are equal) AND ((their signs are - // equal) or (their magnitudes are zero AND they are floating point - // numbers)). Also, A and B are not equal if they are unordered. - assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - - // A is less than B if (A is negative and B is posiive) OR - // (A and B are positive and the magnitude of A is less than - // the magnitude of B) or (A and B are negative integers and - // the magnitude of A is less than the magnitude of B) or - // (A and B are negative floating point numbers and - // the magnitude of A is greater than the magnitude of B). - // Also, A is not less than B if A and B are equal or unordered. - assign LT = ((~LT_mag & A[63] & B[63]) | - (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; - - // A is greater than B when LT, EQ, and UO are are false. - assign GT = ~(LT | EQ | UO); - - // Note: it may be possible to optimize the setting of fcc - // a little more, but it is probably not worth the effort. - - // Set the bits of fcc based on LT, GT, EQ, and UO - assign fcc[0] = LT | UO; - assign fcc[1] = GT | UO; - - always_comb begin - case (FOpCtrlE[2:0]) - 3'b111: FCmpResultE = LT ? A : B;//min - 3'b101: FCmpResultE = GT ? A : B;//max - 3'b010: FCmpResultE = {63'b0, EQ};//equal - 3'b001: FCmpResultE = {63'b0, LT};//less than - 3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal - default: FCmpResultE = 64'b0; - endcase - end - -endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv deleted file mode 100755 index ee14afb94..000000000 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ /dev/null @@ -1,243 +0,0 @@ -// // -// // File name : fpcomp.v -// // Title : Floating-Point Comparator -// // project : FPU -// // Library : fpcomp -// // Author(s) : James E. Stine -// // Purpose : definition of main unit to floating-point comparator -// // notes : -// // -// // Copyright Oklahoma State University -// // -// // Floating Point Comparator (Algorithm) -// // -// // 1.) Performs sign-extension if the inputs are 32-bit integers. -// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// // 3.) Check for special cases (+0=-0, unordered, and infinite values) -// // and correct for sign bits -// // -// // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// // signals, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 (unused) -// // -// // The comparator produces a 2-bit signal FCC, which -// // indicates the result of the comparison: -// // -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // -// // It also produces an invalid operation flag, which is one -// // if either of the input operands is a signaling NaN per 754 - -// module fpucmp2 ( -// input logic [63:0] op1, -// input logic [63:0] op2, -// input logic [1:0] Sel, -// input logic [7:0] w, x, -// input logic ANaN, BNaN, -// input logic Azero, Bzero, -// input logic [3:0] FOpCtrlM, -// input logic FmtM, - -// output logic Invalid, // Invalid Operation -// output logic [1:0] FCC, // Condition Codes -// output logic [63:0] FCmpResultM); - -// logic LT; // magnitude op1 < magnitude op2 -// logic EQ; // magnitude op1 = magnitude op2 - -// // Perform magnitude comparison between the 63 least signficant bits -// // of the input operands. Only LT and EQ are returned, since GT can -// // be determined from these values. -// magcompare64b_2 magcomp2 (LT, EQ, w, x); - -// // Determine final values based on output of magnitude comparison, -// // sign bits, and special case testing. -// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); - - -// endmodule // fpcomp - -// /*module magcompare2b (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// // Determine if A < B using a minimized sum-of-products expression -// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; -// // Determine if A > B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule*/ // magcompare2b - -// // 2-bit magnitude comparator -// // This module compares two 2-bit values A and B. LT is '1' if A < B -// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// // this version actually incorporates don't cares into the equation to -// // simplify the optimization - -// // module magcompare2c (LT, GT, A, B); - -// // input logic [1:0] A; -// // input logic [1:0] B; - -// // output logic LT; -// // output logic GT; - -// // assign LT = B[1] | (!A[1]&B[0]); -// // assign GT = A[1] | (!B[1]&A[0]); - -// // endmodule // magcompare2b - -// // This module compares two 64-bit values A and B. LT is '1' if A < B -// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// // This structure was modified so -// // that it only does a strict magnitdude comparison, and only -// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// // of 63 2-bit magnitude comparators, followed by one OR gates. -// // -// // J. E. Stine and M. J. Schulte, "A combined two's complement and -// // floating-point comparator," 2005 IEEE International Symposium on -// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// // doi: 10.1109/ISCAS.2005.1464531 - -// module magcompare64b_2 (LT, EQ, w, x); - -// input logic [7:0] w; -// input logic [7:0] x; -// logic [3:0] y; -// logic [3:0] z; -// logic [1:0] a; -// logic [1:0] b; -// logic GT; - -// output logic LT; -// output logic EQ; - -// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); -// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); -// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); -// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - -// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); -// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - -// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - -// assign EQ = ~(LT | GT); - -// endmodule // magcompare64b - -// // This module takes 64-bits inputs A and B, two magnitude comparison -// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 bfloat precision numbers -// // -// // The comparator produces a 2-bit signal fcc, which -// // indicates the result of the comparison as follows: -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // It also produces a invalid operation flag, which is one -// // if either of the input operands is a signaling NaN. - -// module exception_cmp_2 ( -// input logic [63:0] A, -// input logic [63:0] B, -// input logic FmtM, -// input logic LT_mag, -// input logic EQ_mag, -// input logic [1:0] Sel, -// input logic [3:0] FOpCtrlM, - -// output logic invalid, -// output logic [1:0] fcc, -// output logic [63:0] FCmpResultM, - -// input logic Azero, -// input logic Bzero, -// input logic ANaN, -// input logic BNaN); - -// logic dp; -// logic sp; -// logic hp; -// logic ASNaN; -// logic BSNaN; -// logic UO; -// logic GT; -// logic LT; -// logic EQ; -// logic [62:0] sixtythreezeros = 63'h0; - -// assign dp = !Sel[1]&!Sel[0]; -// assign sp = !Sel[1]&Sel[0]; -// assign hp = Sel[1]&!Sel[0]; - -// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating -// // point comparison is being performed. -// assign UO = (ANaN | BNaN); - -// // Test if A or B is a signaling NaN. -// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); -// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - -// // If either A or B is a signaling NaN the "Invalid Operation" -// // exception flag is set to one; otherwise it is zero. -// assign invalid = (ASNaN | BSNaN); - -// // A and B are equal if (their magnitudes are equal) AND ((their signs are -// // equal) or (their magnitudes are zero AND they are floating point -// // numbers)). Also, A and B are not equal if they are unordered. -// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - -// // A is less than B if (A is negative and B is posiive) OR -// // (A and B are positive and the magnitude of A is less than -// // the magnitude of B) or (A and B are negative integers and -// // the magnitude of A is less than the magnitude of B) or -// // (A and B are negative floating point numbers and -// // the magnitude of A is greater than the magnitude of B). -// // Also, A is not less than B if A and B are equal or unordered. -// assign LT = ((~LT_mag & A[63] & B[63]) | -// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; - -// // A is greater than B when LT, EQ, and UO are are false. -// assign GT = ~(LT | EQ | UO); - -// // Note: it may be possible to optimize the setting of fcc -// // a little more, but it is probably not worth the effort. - -// // Set the bits of fcc based on LT, GT, EQ, and UO -// assign fcc[0] = LT | UO; -// assign fcc[1] = GT | UO; - -// always_comb begin -// case (FOpCtrlM[2:0]) -// 3'b111: FCmpResultM = LT ? A : B;//min -// 3'b101: FCmpResultM = GT ? A : B;//max -// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal -// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than -// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal -// default: FCmpResultM = 64'b0; -// endcase -// end - - -// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv deleted file mode 100644 index 4d0895a77..000000000 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ /dev/null @@ -1,67 +0,0 @@ -/////////////////////////////////////////// -// fpuhazard.sv -// -// Written: me@KatherineParry.com 19 May 2021 -// Modified: -// -// Purpose: Determine forwarding, stalls and flushes for the FPU -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module fpuhazard( - input logic [4:0] Adr1E, Adr2E, Adr3E, - input logic FWriteEnM, FWriteEnW, - input logic [4:0] RdM, RdW, - input logic [2:0] FResultSelM, - output logic FStallD, - output logic [1:0] ForwardXE, ForwardYE, ForwardZE -); - - - always_comb begin - // set ReadData as default - ForwardXE = 2'b00; // choose FRD1E - ForwardYE = 2'b00; // choose FRD2E - ForwardZE = 2'b00; // choose FRD3E - FStallD = 0; - - if ((Adr1E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W - - - if ((Adr2E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W - - - if ((Adr3E == RdM) & FWriteEnM) - // if the result will be FResM - if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM - else FStallD = 1; // if the result won't be ready stall - else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W - - end - -endmodule diff --git a/wally-pipelined/src/fpu/freg.sv b/wally-pipelined/src/fpu/freg.sv deleted file mode 100755 index b7e167131..000000000 --- a/wally-pipelined/src/fpu/freg.sv +++ /dev/null @@ -1,515 +0,0 @@ - -`include "wally-config.vh" -// `include "../../config/rv64icfd/wally-config.vh" //debug - -module freg1adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //this could be done with: - // - //assign readData = regOutput[adr1]; - // - //but always_comb allows for finer control - - - //address decoder - //only 1 for this fp register set - //used with fpsign - //defaults to outputting zeroes - always_comb begin - case(adr1) - 5'b00000 : readData = regOutput[0]; - 5'b00001 : readData = regOutput[1]; - 5'b00010 : readData = regOutput[2]; - 5'b00011 : readData = regOutput[3]; - 5'b00100 : readData = regOutput[4]; - 5'b00101 : readData = regOutput[5]; - 5'b00110 : readData = regOutput[6]; - 5'b00111 : readData = regOutput[7]; - 5'b01000 : readData = regOutput[8]; - 5'b01001 : readData = regOutput[9]; - 5'b01010 : readData = regOutput[10]; - 5'b01011 : readData = regOutput[11]; - 5'b01100 : readData = regOutput[12]; - 5'b01101 : readData = regOutput[13]; - 5'b01110 : readData = regOutput[14]; - 5'b01111 : readData = regOutput[15]; - 5'b10000 : readData = regOutput[16]; - 5'b10001 : readData = regOutput[17]; - 5'b10010 : readData = regOutput[18]; - 5'b10011 : readData = regOutput[19]; - 5'b10100 : readData = regOutput[20]; - 5'b10101 : readData = regOutput[21]; - 5'b10110 : readData = regOutput[22]; - 5'b10111 : readData = regOutput[23]; - 5'b11000 : readData = regOutput[24]; - 5'b11001 : readData = regOutput[25]; - 5'b11010 : readData = regOutput[26]; - 5'b11011 : readData = regOutput[27]; - 5'b11100 : readData = regOutput[28]; - 5'b11101 : readData = regOutput[29]; - 5'b11110 : readData = regOutput[30]; - 5'b11111 : readData = regOutput[31]; - default : readData = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg2adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //2 are used for this fp register set - //used with fpadd/cvt, fpdiv/sqrt, and fpcmp - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg3adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [4:0] adr3, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2, - output logic [`XLEN-1:0] readData3); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [numRegs-1:0] [`XLEN-1:0] regInput; - logic [numRegs-1:0] [`XLEN-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //3 are used for this fp register set - //used exclusively for fma - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - - //address 3 decoder - case(adr3) - 5'b00000 : readData3 = regOutput[0]; - 5'b00001 : readData3 = regOutput[1]; - 5'b00010 : readData3 = regOutput[2]; - 5'b00011 : readData3 = regOutput[3]; - 5'b00100 : readData3 = regOutput[4]; - 5'b00101 : readData3 = regOutput[5]; - 5'b00110 : readData3 = regOutput[6]; - 5'b00111 : readData3 = regOutput[7]; - 5'b01000 : readData3 = regOutput[8]; - 5'b01001 : readData3 = regOutput[9]; - 5'b01010 : readData3 = regOutput[10]; - 5'b01011 : readData3 = regOutput[11]; - 5'b01100 : readData3 = regOutput[12]; - 5'b01101 : readData3 = regOutput[13]; - 5'b01110 : readData3 = regOutput[14]; - 5'b01111 : readData3 = regOutput[15]; - 5'b10000 : readData3 = regOutput[16]; - 5'b10001 : readData3 = regOutput[17]; - 5'b10010 : readData3 = regOutput[18]; - 5'b10011 : readData3 = regOutput[19]; - 5'b10100 : readData3 = regOutput[20]; - 5'b10101 : readData3 = regOutput[21]; - 5'b10110 : readData3 = regOutput[22]; - 5'b10111 : readData3 = regOutput[23]; - 5'b11000 : readData3 = regOutput[24]; - 5'b11001 : readData3 = regOutput[25]; - 5'b11010 : readData3 = regOutput[26]; - 5'b11011 : readData3 = regOutput[27]; - 5'b11100 : readData3 = regOutput[28]; - 5'b11101 : readData3 = regOutput[29]; - 5'b11110 : readData3 = regOutput[30]; - 5'b11111 : readData3 = regOutput[31]; - default : readData3 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01001 : regInput[9] = writeData; - 5'b01010 : regInput[10] = writeData; - 5'b01011 : regInput[11] = writeData; - 5'b01100 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11001 : regInput[25] = writeData; - 5'b11010 : regInput[26] = writeData; - 5'b11011 : regInput[27] = writeData; - 5'b11100 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 62d0e7d7c..7df9386c7 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,13 +1,12 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); +module fsgn ( + input logic [63:0] SrcXE, SrcYE, + input logic [1:0] SgnOpCodeE, + output logic [63:0] SgnResE, + output logic SgnNVE); - input [63:0] SrcXE, SrcYE; - input [1:0] SgnOpCodeE; - output [63:0] SgnResultE; - output [4:0] SgnFlagsE; - - wire AonesExp; + logic AonesExp; //op code designation: // @@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = SrcXE[62:0]; + assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will @@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN - assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0}; + assign SgnNVE = AonesExp & SgnResE[63]; endmodule diff --git a/wally-pipelined/src/fpu/ling_bk13.sv b/wally-pipelined/src/fpu/ling_bk13.sv deleted file mode 100755 index a35c7a8f7..000000000 --- a/wally-pipelined/src/fpu/ling_bk13.sv +++ /dev/null @@ -1,89 +0,0 @@ -// Brent-Kung Prefix Adder - -module ling_bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g; - wire [13:1] h,c; - -// pre-computation - assign p={a|b,1'b1}; - assign g={a&b, cin}; - -// prefix tree - ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]); - -// post-computation - assign h[13]=g[13]|c[13]; - assign sum=p[13:1]^h|g[13:1]&c; - assign cout=p[13]&h[13]; - -endmodule - -module ling_brent_kung (h, c, p, g); - - input [12:0] p; - input [13:0] g; - output [13:1] h; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates H/I pairs that span 1 bits - rgry g_1_0 (H_1_0, {g[1],g[0]}); - rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]}); - rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]}); - rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]}); - rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]}); - rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]}); - rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]}); - - // Stage 2: Generates H/I pairs that span 2 bits - grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2); - black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4}); - black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8}); - - // Stage 3: Generates H/I pairs that span 4 bits - grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4); - - // Stage 4: Generates H/I pairs that span 8 bits - - // Stage 5: Generates H/I pairs that span 4 bits - grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8); - - // Stage 6: Generates H/I pairs that span 2 bits - grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4); - grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8); - - // Last grey cell stage - grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]); - grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]); - grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]); - grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]); - grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]); - grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]); - - // Final Stage: Apply c_k+1=p_k&H_k_0 - assign c[1]=g[0]; - - assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0; - assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0; - assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0; - assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0; - assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0; - assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0; - assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0; - assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0; - - assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0; - assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0; - assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0; - assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0; - -endmodule - - diff --git a/wally-pipelined/src/fpu/lzd_denorm.sv b/wally-pipelined/src/fpu/lzd_denorm.sv index 21efbf5fc..860a33817 100755 --- a/wally-pipelined/src/fpu/lzd_denorm.sv +++ b/wally-pipelined/src/fpu/lzd_denorm.sv @@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B); endmodule // lz52 + diff --git a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv old mode 100755 new mode 100644 diff --git a/wally-pipelined/src/fpu/rounder_denorm.sv b/wally-pipelined/src/fpu/rounder_denorm.sv index 70df0656b..b6793594c 100755 --- a/wally-pipelined/src/fpu/rounder_denorm.sv +++ b/wally-pipelined/src/fpu/rounder_denorm.sv @@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign B_12_overflow = {8'h0, 3'b0, normal_overflow}; assign B_12_underflow = {8'h0, 3'b0, normal_underflow}; - cla52 add1(Tmant, Cout, A[62:11], B); + cla52 add1(Tmant, Cout, A[62:11], B); //***adder - cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); + cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder - cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); + cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder // Now that rounding is done, we compute the final exponent // and test for special cases. diff --git a/wally-pipelined/src/fpu/sbtm_a4.sv b/wally-pipelined/src/fpu/sbtm_a4.sv deleted file mode 100755 index 7ffe4c617..000000000 --- a/wally-pipelined/src/fpu/sbtm_a4.sv +++ /dev/null @@ -1,204 +0,0 @@ -module sbtm_a4 (input logic [7:0] a, - output logic [13:0] y); - always_comb - case(a) - 8'b01000000: y = 14'b10110100010111; - 8'b01000001: y = 14'b10110010111111; - 8'b01000010: y = 14'b10110001101000; - 8'b01000011: y = 14'b10110000010011; - 8'b01000100: y = 14'b10101111000001; - 8'b01000101: y = 14'b10101101110000; - 8'b01000110: y = 14'b10101100100001; - 8'b01000111: y = 14'b10101011010011; - 8'b01001000: y = 14'b10101010000111; - 8'b01001001: y = 14'b10101000111101; - 8'b01001010: y = 14'b10100111110100; - 8'b01001011: y = 14'b10100110101101; - 8'b01001100: y = 14'b10100101100111; - 8'b01001101: y = 14'b10100100100010; - 8'b01001110: y = 14'b10100011011111; - 8'b01001111: y = 14'b10100010011101; - 8'b01010000: y = 14'b10100001011100; - 8'b01010001: y = 14'b10100000011100; - 8'b01010010: y = 14'b10011111011110; - 8'b01010011: y = 14'b10011110100001; - 8'b01010100: y = 14'b10011101100100; - 8'b01010101: y = 14'b10011100101001; - 8'b01010110: y = 14'b10011011101111; - 8'b01010111: y = 14'b10011010110110; - 8'b01011000: y = 14'b10011001111110; - 8'b01011001: y = 14'b10011001000110; - 8'b01011010: y = 14'b10011000010000; - 8'b01011011: y = 14'b10010111011011; - 8'b01011100: y = 14'b10010110100110; - 8'b01011101: y = 14'b10010101110011; - 8'b01011110: y = 14'b10010101000000; - 8'b01011111: y = 14'b10010100001110; - 8'b01100000: y = 14'b10010011011100; - 8'b01100001: y = 14'b10010010101100; - 8'b01100010: y = 14'b10010001111100; - 8'b01100011: y = 14'b10010001001101; - 8'b01100100: y = 14'b10010000011111; - 8'b01100101: y = 14'b10001111110001; - 8'b01100110: y = 14'b10001111000100; - 8'b01100111: y = 14'b10001110011000; - 8'b01101000: y = 14'b10001101101100; - 8'b01101001: y = 14'b10001101000001; - 8'b01101010: y = 14'b10001100010110; - 8'b01101011: y = 14'b10001011101100; - 8'b01101100: y = 14'b10001011000011; - 8'b01101101: y = 14'b10001010011010; - 8'b01101110: y = 14'b10001001110010; - 8'b01101111: y = 14'b10001001001010; - 8'b01110000: y = 14'b10001000100011; - 8'b01110001: y = 14'b10000111111101; - 8'b01110010: y = 14'b10000111010111; - 8'b01110011: y = 14'b10000110110001; - 8'b01110100: y = 14'b10000110001100; - 8'b01110101: y = 14'b10000101100111; - 8'b01110110: y = 14'b10000101000011; - 8'b01110111: y = 14'b10000100011111; - 8'b01111000: y = 14'b10000011111100; - 8'b01111001: y = 14'b10000011011001; - 8'b01111010: y = 14'b10000010110111; - 8'b01111011: y = 14'b10000010010101; - 8'b01111100: y = 14'b10000001110011; - 8'b01111101: y = 14'b10000001010010; - 8'b01111110: y = 14'b10000000110001; - 8'b01111111: y = 14'b10000000010001; - 8'b10000000: y = 14'b01111111110001; - 8'b10000001: y = 14'b01111111010001; - 8'b10000010: y = 14'b01111110110010; - 8'b10000011: y = 14'b01111110010011; - 8'b10000100: y = 14'b01111101110101; - 8'b10000101: y = 14'b01111101010110; - 8'b10000110: y = 14'b01111100111001; - 8'b10000111: y = 14'b01111100011011; - 8'b10001000: y = 14'b01111011111110; - 8'b10001001: y = 14'b01111011100001; - 8'b10001010: y = 14'b01111011000100; - 8'b10001011: y = 14'b01111010101000; - 8'b10001100: y = 14'b01111010001100; - 8'b10001101: y = 14'b01111001110000; - 8'b10001110: y = 14'b01111001010101; - 8'b10001111: y = 14'b01111000111010; - 8'b10010000: y = 14'b01111000011111; - 8'b10010001: y = 14'b01111000000100; - 8'b10010010: y = 14'b01110111101010; - 8'b10010011: y = 14'b01110111010000; - 8'b10010100: y = 14'b01110110110110; - 8'b10010101: y = 14'b01110110011101; - 8'b10010110: y = 14'b01110110000100; - 8'b10010111: y = 14'b01110101101011; - 8'b10011000: y = 14'b01110101010010; - 8'b10011001: y = 14'b01110100111001; - 8'b10011010: y = 14'b01110100100001; - 8'b10011011: y = 14'b01110100001001; - 8'b10011100: y = 14'b01110011110001; - 8'b10011101: y = 14'b01110011011010; - 8'b10011110: y = 14'b01110011000010; - 8'b10011111: y = 14'b01110010101011; - 8'b10100000: y = 14'b01110010010100; - 8'b10100001: y = 14'b01110001111110; - 8'b10100010: y = 14'b01110001100111; - 8'b10100011: y = 14'b01110001010001; - 8'b10100100: y = 14'b01110000111011; - 8'b10100101: y = 14'b01110000100101; - 8'b10100110: y = 14'b01110000001111; - 8'b10100111: y = 14'b01101111111010; - 8'b10101000: y = 14'b01101111100101; - 8'b10101001: y = 14'b01101111010000; - 8'b10101010: y = 14'b01101110111011; - 8'b10101011: y = 14'b01101110100110; - 8'b10101100: y = 14'b01101110010001; - 8'b10101101: y = 14'b01101101111101; - 8'b10101110: y = 14'b01101101101001; - 8'b10101111: y = 14'b01101101010101; - 8'b10110000: y = 14'b01101101000001; - 8'b10110001: y = 14'b01101100101101; - 8'b10110010: y = 14'b01101100011010; - 8'b10110011: y = 14'b01101100000110; - 8'b10110100: y = 14'b01101011110011; - 8'b10110101: y = 14'b01101011100000; - 8'b10110110: y = 14'b01101011001101; - 8'b10110111: y = 14'b01101010111010; - 8'b10111000: y = 14'b01101010101000; - 8'b10111001: y = 14'b01101010010101; - 8'b10111010: y = 14'b01101010000011; - 8'b10111011: y = 14'b01101001110001; - 8'b10111100: y = 14'b01101001011111; - 8'b10111101: y = 14'b01101001001101; - 8'b10111110: y = 14'b01101000111100; - 8'b10111111: y = 14'b01101000101010; - 8'b11000000: y = 14'b01101000011001; - 8'b11000001: y = 14'b01101000000111; - 8'b11000010: y = 14'b01100111110110; - 8'b11000011: y = 14'b01100111100101; - 8'b11000100: y = 14'b01100111010100; - 8'b11000101: y = 14'b01100111000011; - 8'b11000110: y = 14'b01100110110011; - 8'b11000111: y = 14'b01100110100010; - 8'b11001000: y = 14'b01100110010010; - 8'b11001001: y = 14'b01100110000010; - 8'b11001010: y = 14'b01100101110010; - 8'b11001011: y = 14'b01100101100001; - 8'b11001100: y = 14'b01100101010010; - 8'b11001101: y = 14'b01100101000010; - 8'b11001110: y = 14'b01100100110010; - 8'b11001111: y = 14'b01100100100011; - 8'b11010000: y = 14'b01100100010011; - 8'b11010001: y = 14'b01100100000100; - 8'b11010010: y = 14'b01100011110101; - 8'b11010011: y = 14'b01100011100101; - 8'b11010100: y = 14'b01100011010110; - 8'b11010101: y = 14'b01100011000111; - 8'b11010110: y = 14'b01100010111001; - 8'b11010111: y = 14'b01100010101010; - 8'b11011000: y = 14'b01100010011011; - 8'b11011001: y = 14'b01100010001101; - 8'b11011010: y = 14'b01100001111110; - 8'b11011011: y = 14'b01100001110000; - 8'b11011100: y = 14'b01100001100010; - 8'b11011101: y = 14'b01100001010100; - 8'b11011110: y = 14'b01100001000110; - 8'b11011111: y = 14'b01100000111000; - 8'b11100000: y = 14'b01100000101010; - 8'b11100001: y = 14'b01100000011100; - 8'b11100010: y = 14'b01100000001111; - 8'b11100011: y = 14'b01100000000001; - 8'b11100100: y = 14'b01011111110100; - 8'b11100101: y = 14'b01011111100110; - 8'b11100110: y = 14'b01011111011001; - 8'b11100111: y = 14'b01011111001100; - 8'b11101000: y = 14'b01011110111111; - 8'b11101001: y = 14'b01011110110010; - 8'b11101010: y = 14'b01011110100101; - 8'b11101011: y = 14'b01011110011000; - 8'b11101100: y = 14'b01011110001011; - 8'b11101101: y = 14'b01011101111110; - 8'b11101110: y = 14'b01011101110010; - 8'b11101111: y = 14'b01011101100101; - 8'b11110000: y = 14'b01011101011001; - 8'b11110001: y = 14'b01011101001100; - 8'b11110010: y = 14'b01011101000000; - 8'b11110011: y = 14'b01011100110100; - 8'b11110100: y = 14'b01011100101000; - 8'b11110101: y = 14'b01011100011100; - 8'b11110110: y = 14'b01011100010000; - 8'b11110111: y = 14'b01011100000100; - 8'b11111000: y = 14'b01011011111000; - 8'b11111001: y = 14'b01011011101100; - 8'b11111010: y = 14'b01011011100000; - 8'b11111011: y = 14'b01011011010101; - 8'b11111100: y = 14'b01011011001001; - 8'b11111101: y = 14'b01011010111101; - 8'b11111110: y = 14'b01011010110010; - 8'b11111111: y = 14'b01011010100111; - default: y = 14'bxxxxxxxxxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - \ No newline at end of file diff --git a/wally-pipelined/src/fpu/sk14.sv b/wally-pipelined/src/fpu/sk14.sv deleted file mode 100755 index 8d6aadb59..000000000 --- a/wally-pipelined/src/fpu/sk14.sv +++ /dev/null @@ -1,90 +0,0 @@ -// Sklansky Prefix Adder - -module sk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - sklansky prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module sklansky (c, p, g); - - input [14:0] p; - input [14:0] g; - output [14:1] c; - - - // parallel-prefix, Sklansky - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - // Stage 2: Generates G/P pairs that span 2 bits - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4}); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4); - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8}); - black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8}); - black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8}); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8); - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8); - grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8); - grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8); - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule - From 72406b8a88139c67358fe93bf561e9832d812099 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 2 Jul 2021 12:53:05 -0400 Subject: [PATCH 4/4] FPU update - missing files --- wally-pipelined/src/fpu/fclassify.sv | 62 ++++ wally-pipelined/src/fpu/fcmp.sv | 465 +++++++++++++++++++++++++++ wally-pipelined/src/fpu/fdivsqrt.sv | 256 +++++++++++++++ wally-pipelined/src/fpu/fhazard.sv | 67 ++++ wally-pipelined/src/fpu/fregfile.sv | 54 ++++ 5 files changed, 904 insertions(+) create mode 100644 wally-pipelined/src/fpu/fclassify.sv create mode 100755 wally-pipelined/src/fpu/fcmp.sv create mode 100755 wally-pipelined/src/fpu/fdivsqrt.sv create mode 100644 wally-pipelined/src/fpu/fhazard.sv create mode 100644 wally-pipelined/src/fpu/fregfile.sv diff --git a/wally-pipelined/src/fpu/fclassify.sv b/wally-pipelined/src/fpu/fclassify.sv new file mode 100644 index 000000000..a15edcb4a --- /dev/null +++ b/wally-pipelined/src/fpu/fclassify.sv @@ -0,0 +1,62 @@ + +`include "wally-config.vh" + +module fclassify ( + input logic [63:0] SrcXE, + input logic FmtE, // 0-Single 1-Double + output logic [63:0] ClassResE + ); + + logic [31:0] Single; + logic [63:0] Double; + logic Sgn; + logic Inf, NaN, Zero, Norm, Denorm; + logic PInf, QNaN, PZero, PNorm, PDenorm; + logic NInf, SNaN, NZero, NNorm, NDenorm; + logic MaxExp, ExpZero, ManZero, FirstBitFrac; + + // Single and Double precision layouts + assign Single = SrcXE[63:32]; + assign Double = SrcXE; + assign Sgn = SrcXE[63]; + + // basic calculations for readabillity + + assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23]; + assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23]; + assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0]; + assign FirstBitFrac = FmtE ? Double[51] : Single[22]; + + // determine the type of number + assign NaN = MaxExp & ~ManZero; + assign Inf = MaxExp & ManZero; + assign Zero = ExpZero & ManZero; + assign Denorm= ExpZero & ~ManZero; + assign Norm = ~ExpZero; + + // determine the sub categories + assign QNaN = FirstBitFrac&NaN; + assign SNaN = ~FirstBitFrac&NaN; + assign PInf = ~Sgn&Inf; + assign NInf = Sgn&Inf; + assign PNorm = ~Sgn&Norm; + assign NNorm = Sgn&Norm; + assign PDenorm = ~Sgn&Denorm; + assign NDenorm = Sgn&Denorm; + assign PZero = ~Sgn&Zero; + assign NZero = Sgn&Zero; + + // determine sub category and combine into the result + // bit 0 - -Inf + // bit 1 - -Norm + // bit 2 - -Denorm + // bit 3 - -Zero + // bit 4 - +Zero + // bit 5 - +Denorm + // bit 6 - +Norm + // bit 7 - +Inf + // bit 8 - signaling NaN + // bit 9 - quiet NaN + assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + +endmodule diff --git a/wally-pipelined/src/fpu/fcmp.sv b/wally-pipelined/src/fpu/fcmp.sv new file mode 100755 index 000000000..f47d7c9ef --- /dev/null +++ b/wally-pipelined/src/fpu/fcmp.sv @@ -0,0 +1,465 @@ + +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + +`include "wally-config.vh" +module fcmp ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, + + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] CmpResE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); + + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + magcompare64b_2 magcomp2 (LT, EQ, w, x); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); + +endmodule // fpcomp + +// module magcompare2b (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +// endmodule // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +module magcompare2c (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + assign LT = B[1] | (!A[1]&B[0]); + assign GT = A[1] | (!B[1]&A[0]); + +endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_1 (w, x, A, B); + + input logic [63:0] A; + input logic [63:0] B; + + logic [31:0] s; + logic [31:0] t; + logic [15:0] u; + logic [15:0] v; + output logic [7:0] w; + output logic [7:0] x; + + magcompare2b mag1(s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2(s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3(s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4(s[3], t[3], A[7:6], B[7:6]); + magcompare2b mag5(s[4], t[4], A[9:8], B[9:8]); + magcompare2b mag6(s[5], t[5], A[11:10], B[11:10]); + magcompare2b mag7(s[6], t[6], A[13:12], B[13:12]); + magcompare2b mag8(s[7], t[7], A[15:14], B[15:14]); + magcompare2b mag9(s[8], t[8], A[17:16], B[17:16]); + magcompare2b magA(s[9], t[9], A[19:18], B[19:18]); + magcompare2b magB(s[10], t[10], A[21:20], B[21:20]); + magcompare2b magC(s[11], t[11], A[23:22], B[23:22]); + magcompare2b magD(s[12], t[12], A[25:24], B[25:24]); + magcompare2b magE(s[13], t[13], A[27:26], B[27:26]); + magcompare2b magF(s[14], t[14], A[29:28], B[29:28]); + magcompare2b mag10(s[15], t[15], A[31:30], B[31:30]); + magcompare2b mag11(s[16], t[16], A[33:32], B[33:32]); + magcompare2b mag12(s[17], t[17], A[35:34], B[35:34]); + magcompare2b mag13(s[18], t[18], A[37:36], B[37:36]); + magcompare2b mag14(s[19], t[19], A[39:38], B[39:38]); + magcompare2b mag15(s[20], t[20], A[41:40], B[41:40]); + magcompare2b mag16(s[21], t[21], A[43:42], B[43:42]); + magcompare2b mag17(s[22], t[22], A[45:44], B[45:44]); + magcompare2b mag18(s[23], t[23], A[47:46], B[47:46]); + magcompare2b mag19(s[24], t[24], A[49:48], B[49:48]); + magcompare2b mag1A(s[25], t[25], A[51:50], B[51:50]); + magcompare2b mag1B(s[26], t[26], A[53:52], B[53:52]); + magcompare2b mag1C(s[27], t[27], A[55:54], B[55:54]); + magcompare2b mag1D(s[28], t[28], A[57:56], B[57:56]); + magcompare2b mag1E(s[29], t[29], A[59:58], B[59:58]); + magcompare2b mag1F(s[30], t[30], A[61:60], B[61:60]); + magcompare2b mag20(s[31], t[31], A[63:62], B[63:62]); + + magcompare2c mag21(u[0], v[0], t[1:0], s[1:0]); + magcompare2c mag22(u[1], v[1], t[3:2], s[3:2]); + magcompare2c mag23(u[2], v[2], t[5:4], s[5:4]); + magcompare2c mag24(u[3], v[3], t[7:6], s[7:6]); + magcompare2c mag25(u[4], v[4], t[9:8], s[9:8]); + magcompare2c mag26(u[5], v[5], t[11:10], s[11:10]); + magcompare2c mag27(u[6], v[6], t[13:12], s[13:12]); + magcompare2c mag28(u[7], v[7], t[15:14], s[15:14]); + magcompare2c mag29(u[8], v[8], t[17:16], s[17:16]); + magcompare2c mag2A(u[9], v[9], t[19:18], s[19:18]); + magcompare2c mag2B(u[10], v[10], t[21:20], s[21:20]); + magcompare2c mag2C(u[11], v[11], t[23:22], s[23:22]); + magcompare2c mag2D(u[12], v[12], t[25:24], s[25:24]); + magcompare2c mag2E(u[13], v[13], t[27:26], s[27:26]); + magcompare2c mag2F(u[14], v[14], t[29:28], s[29:28]); + magcompare2c mag30(u[15], v[15], t[31:30], s[31:30]); + + magcompare2c mag31(w[0], x[0], v[1:0], u[1:0]); + magcompare2c mag32(w[1], x[1], v[3:2], u[3:2]); + magcompare2c mag33(w[2], x[2], v[5:4], u[5:4]); + magcompare2c mag34(w[3], x[3], v[7:6], u[7:6]); + magcompare2c mag35(w[4], x[4], v[9:8], u[9:8]); + magcompare2c mag36(w[5], x[5], v[11:10], u[11:10]); + magcompare2c mag37(w[6], x[6], v[13:12], u[13:12]); + magcompare2c mag38(w[7], x[7], v[15:14], u[15:14]); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); + + input logic [63:0] A; + input logic [63:0] B; + input logic [2:0] FOpCtrlE; + + logic dp, sp, hp; + + output logic ANaN; + output logic BNaN; + output logic Azero; + output logic Bzero; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Test if A or B is NaN. + assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & + ((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) | + (dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) | + (hp&(A[57]|A[56]))); + + assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) & + ((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) | + (dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) | + (hp&(B[57]|B[56]))); + + // Test if A is +0 or -0 when viewed as a floating point number (i.e, + // the 63 least siginficant bits of A are zero). + // Depending on how this synthesizes, it may work better to replace + // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) + assign Azero = (A[62:0] == 63'h0); + assign Bzero = (B[62:0] == 63'h0); + +endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] CmpResE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: CmpResE = LT ? A : B;//min + 3'b101: CmpResE = GT ? A : B;//max + 3'b010: CmpResE = {63'b0, EQ};//equal + 3'b001: CmpResE = {63'b0, LT};//less than + 3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal + default: CmpResE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fdivsqrt.sv b/wally-pipelined/src/fpu/fdivsqrt.sv new file mode 100755 index 000000000..6d8da23f2 --- /dev/null +++ b/wally-pipelined/src/fpu/fdivsqrt.sv @@ -0,0 +1,256 @@ +// +// File name : fpdiv +// Title : Floating-Point Divider/Square-Root +// project : FPU +// Library : fpdiv +// Author(s) : James E. Stine, Jr. +// Purpose : definition of main unit to floating-point div/sqrt +// notes : +// +// Copyright Oklahoma State University +// +// Basic Operations +// +// Step 1: Load operands, set flags, and convert SP to DP +// Step 2: Check for special inputs ( +/- Infinity, NaN) +// Step 3: Exponent Logic +// Step 4: Divide/Sqrt using Goldschmidt +// Step 5: Normalize the result.// +// Shift left until normalized. Normalized when the value to the +// left of the binrary point is 1. +// Step 6: Round the result.// +// Step 7: Put quotient/remainder onto output. +// + +// `timescale 1ps/1ps +module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, + FDivStartE, reset, clk, FDivBusyE, HoldInputs); + + input [63:0] DivInput1E; // 1st input operand (A) + input [63:0] DivInput2E; // 2nd input operand (B) + input [2:0] FrmE; // Rounding mode - specify values + input DivOpType; // Function opcode + input FmtE; // Result Precision (0 for double, 1 for single) //***will need to swap this + input DivOvEn; // Overflow trap enabled + input DivUnEn; // Underflow trap enabled + + input FDivStartE; + input reset; + input clk; + + output [63:0] FDivResultM; // Result of operation + output [4:0] FDivSqrtFlgM; // IEEE exception flags + output FDivSqrtDoneE; + output FDivBusyE, HoldInputs; + + supply1 vdd; + supply0 vss; + + wire [63:0] Float1; + wire [63:0] Float2; + wire [63:0] IntValue; + + wire DivDenormM; // DivDenormM on input or output + wire [12:0] exp1, exp2, expF; + wire [12:0] exp_diff, bias; + wire [13:0] exp_sqrt; + wire [12:0] exp_s; + wire [12:0] exp_c; + + wire [10:0] exponent, exp_pre; + wire [63:0] Result; + wire [52:0] mantissaA; + wire [52:0] mantissaB; + wire [63:0] sum, sum_tc, sum_corr, sum_norm; + + wire [5:0] align_shift; + wire [5:0] norm_shift; + wire [2:0] sel_inv; + wire op1_Norm, op2_Norm; + wire opA_Norm, opB_Norm; + wire Invalid; + wire DenormIn, DenormIO; + wire [4:0] FlagsIn; + wire exp_gt63; + wire Sticky_out; + wire signResult, sign_corr; + wire corr_sign; + wire zeroB; + wire convert; + wire swap; + wire sub; + + wire [63:0] q1, qm1, qp1, q0, qm0, qp0; + wire [63:0] rega_out, regb_out, regc_out, regd_out; + wire [127:0] regr_out; + wire [2:0] sel_muxa, sel_muxb; + wire sel_muxr; + wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs; + + wire donev, sel_muxrv, sel_muxsv; + wire [1:0] sel_muxav, sel_muxbv; + wire load_regav, load_regbv, load_regcv; + wire load_regrv, load_regsv; + + logic exp_cout1, exp_cout2, exp_odd, open; + + + // Convert the input operands to their appropriate forms based on + // the orignal operands, the DivOpType , and their precision FmtE. + // Single precision inputs are converted to double precision + // and the sign of the first operand is set appropratiately based on + // if the operation is absolute value or negation. + convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); + + // Test for exceptions and return the "Invalid Operation" and + // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in + // the third pipeline stage to select the result. Also, op1_Norm + // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. + // sub is one if the effective operation is subtaction. + exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, + Float1, Float2, DivOpType); + + // Determine Sign/Mantissa + assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType; + assign mantissaA = {vdd, Float1[51:0]}; + assign mantissaB = {vdd, Float2[51:0]}; + // Perform Exponent Subtraction - expA - expB + Bias + assign exp1 = {2'b0, Float1[62:52]}; + assign exp2 = {2'b0, Float2[62:52]}; + // bias : DP = 2^{11-1}-1 = 1023 + assign bias = {3'h0, 10'h3FF}; + // Divide exponent + csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder + exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder? + {vss, exp_s}, {vss, exp_c}, 1'b1); + // Sqrt exponent (check if exponent is odd) + assign exp_odd = Float1[52] ? vss : vdd; + exp_add explogic2 (exp_cout2, exp_sqrt, //***adder? + {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); + // Choose correct exponent + assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; + + // Main Goldschmidt/Division Routine + divconv goldy (q1, qm1, qp1, q0, qm0, qp0, + rega_out, regb_out, regc_out, regd_out, + regr_out, mantissaB, mantissaA, + sel_muxa, sel_muxb, sel_muxr, + reset, clk, + load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, FmtE, DivOpType, exp_odd); + + // FSM : control divider + fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, + clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs); + + // Round the mantissa to a 52-bit value, with the leading one + // removed. The rounding units also handles special cases and + // set the exception flags. + //***add max magnitude and swap negitive and positive infinity + rounder_div divround1 (Result, DenormIO, FlagsIn, + FrmE, FmtE, DivOvEn, DivUnEn, expF, + sel_inv, Invalid, DenormIn, signResult, + q1, qm1, qp1, q0, qm0, qp0, regr_out); + + // Store the final result and the exception flags in registers. + flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); + flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); + flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM); + +endmodule // fpadd + +// +// Brent-Kung Prefix Adder +// (yes, it is 14 bits as my generator is broken for 13 bits :( +// assume, synthesizer will delete stuff not needed ) +// +module exp_add (cout, sum, a, b, cin); + + input [13:0] a, b; + input cin; + + output [13:0] sum; + output cout; + + wire [14:0] p,g; + wire [13:0] c; + + // pre-computation + assign p={a^b,1'b0}; + assign g={a&b, cin}; + + // prefix tree + brent_kung prefix_tree(c, p[13:0], g[13:0]); + + // post-computation + assign sum=p[14:1]^c; + assign cout=g[14]|(p[14]&c[13]); + +endmodule // exp_add + +module brent_kung (c, p, g); + + input [13:0] p; + input [13:0] g; + output [14:1] c; + + logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8; + logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8; + logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; + // parallel-prefix, Brent-Kung + + // Stage 1: Generates G/FmtE pairs that span 1 bits + grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); + black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); + black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); + black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); + black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); + black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); + black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); + + // Stage 2: Generates G/FmtE pairs that span 2 bits + grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); + black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); + black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); + + // Stage 3: Generates G/FmtE pairs that span 4 bits + grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); + + // Stage 4: Generates G/FmtE pairs that span 8 bits + + // Stage 5: Generates G/FmtE pairs that span 4 bits + grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); + + // Stage 6: Generates G/FmtE pairs that span 2 bits + grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); + grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); + grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); + + // Last grey cell stage + grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); + grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); + grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); + grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); + grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); + grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); + + // Final Stage: Apply c_k+1=G_k_0 + assign c[1]=g[0]; + assign c[2]=G_1_0; + assign c[3]=G_2_0; + assign c[4]=G_3_0; + assign c[5]=G_4_0; + assign c[6]=G_5_0; + assign c[7]=G_6_0; + assign c[8]=G_7_0; + assign c[9]=G_8_0; + + assign c[10]=G_9_0; + assign c[11]=G_10_0; + assign c[12]=G_11_0; + assign c[13]=G_12_0; + assign c[14]=G_13_0; + +endmodule // brent_kung + diff --git a/wally-pipelined/src/fpu/fhazard.sv b/wally-pipelined/src/fpu/fhazard.sv new file mode 100644 index 000000000..53f7dde2c --- /dev/null +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -0,0 +1,67 @@ +/////////////////////////////////////////// +// fpuhazard.sv +// +// Written: me@KatherineParry.com 19 May 2021 +// Modified: +// +// Purpose: Determine forwarding, stalls and flushes for the FPU +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fhazard( + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE +); + + + always_comb begin + // set ReadData as default + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W + + + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W + + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W + + end + +endmodule diff --git a/wally-pipelined/src/fpu/fregfile.sv b/wally-pipelined/src/fpu/fregfile.sv new file mode 100644 index 000000000..78c24b3e6 --- /dev/null +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -0,0 +1,54 @@ +/////////////////////////////////////////// +// regfile.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: 4-port register file +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fregfile ( + input logic clk, reset, + input logic we4, + input logic [ 4:0] a1, a2, a3, a4, + input logic [63:0] wd4, //KEP `XLEN-1 changed to 63 (lint warning) *** figure out if double can be suported when XLEN = 32 + output logic [63:0] rd1, rd2, rd3); + + logic [63:0] rf[31:0]; + integer i; + + // three ported register file + // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) + // write fourth port on rising edge of clock (A4/WD4/WE4) + // write occurs on falling edge of clock + + // reset is intended for simulation only, not synthesis + + always_ff @(negedge clk or posedge reset) + if (reset) for(i=0; i<32; i++) rf[i] <= 0; + else if (we4) rf[a4] <= wd4; + + assign #2 rd1 = rf[a1]; + assign #2 rd2 = rf[a2]; + assign #2 rd3 = rf[a3]; + +endmodule // regfile +