From 5bcae393c93b11c83420ba1e05ea51d40c474b3a Mon Sep 17 00:00:00 2001 From: Skylar Litz Date: Mon, 4 Oct 2021 18:23:31 -0400 Subject: [PATCH 1/3] added delayed MIP signal --- wally-pipelined/testbench/testbench-linux.sv | 22 ++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 76a1841b..73077e7d 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -38,7 +38,7 @@ module testbench(); - parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*8700000; // # of instructions at which to turn on waves in graphical sim + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3100000; // # of instructions at which to turn on waves in graphical sim string ProgramAddrMapFile, ProgramLabelMapFile; /////////////////////////////////////////////////////////////////////////////// @@ -137,6 +137,7 @@ module testbench(); integer NumCSRWIndex; integer NumCSRPostWIndex; logic [`XLEN-1:0] InstrCountW; + integer RequestDelayedMIP; // ------ // Macros @@ -246,9 +247,16 @@ module testbench(); MarkerIndex += 2; // match MIP to QEMU's because interrupts are imprecise if(ExpectedCSRArrayM[NumCSRM].substr(0, 2) == "mip") begin - $display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueM[NumCSRM]); - MIPexpected = ExpectedCSRArrayValueM[NumCSRM]; - force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + $display("%tn: ExpectedCSRArrayM[7] (MEPC) = %x",$time,ExpectedCSRArrayM[7]); + $display("%tn: ExpectedPCM = %x",$time,ExpectedPCM); + // if PC does not equal MEPC, request delayed MIP is True + if(ExpectedPCM != ExpectedCSRArrayM[7]) begin + RequestDelayedMIP = 1; + end else begin + $display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueM[NumCSRM]); + MIPexpected = ExpectedCSRArrayValueM[NumCSRM]; + force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + end end NumCSRM++; end @@ -326,6 +334,12 @@ module testbench(); // step2: make all checks in the write back stage. always @(negedge clk) begin + if(RequestDelayedMIP) begin + $display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueW[NumCSRM]); + MIPexpected = ExpectedCSRArrayValueW[NumCSRM]; + force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + RequestDelayedMIP = 0; + end // always check PC, instruction bits if (checkInstrW) begin InstrCountW += 1; From a91c0c8fc714017f25d0aa6a4144e68ee4028efd Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Wed, 6 Oct 2021 08:26:09 -0500 Subject: [PATCH 2/3] Make changes to fpdiv - still working on clock issue with fsm that was changed from posedge to negedge - also updated fpdivsqrt rounding to handle testfloat --- wally-pipelined/src/fpu/convert_inputs.sv | 29 +- wally-pipelined/src/fpu/exception_div.sv | 27 +- wally-pipelined/src/fpu/fpdiv.sv | 155 +++--- wally-pipelined/src/fpu/fpu.sv | 613 ++++++++++------------ wally-pipelined/src/fpu/fregfile.sv | 33 +- wally-pipelined/src/fpu/fsm.sv | 146 +++--- wally-pipelined/src/fpu/rounder_div.sv | 109 ++-- wally-pipelined/src/fpu/sbtm_a0.sv | 29 +- wally-pipelined/src/fpu/sbtm_a1.sv | 29 +- wally-pipelined/src/fpu/sbtm_a2.sv | 29 +- wally-pipelined/src/fpu/sbtm_a3.sv | 27 +- wally-pipelined/src/fpu/sbtm_div.sv | 24 + wally-pipelined/src/fpu/sbtm_sqrt.sv | 24 + 13 files changed, 698 insertions(+), 576 deletions(-) diff --git a/wally-pipelined/src/fpu/convert_inputs.sv b/wally-pipelined/src/fpu/convert_inputs.sv index bf56cb00..9a0584ba 100755 --- a/wally-pipelined/src/fpu/convert_inputs.sv +++ b/wally-pipelined/src/fpu/convert_inputs.sv @@ -1,9 +1,26 @@ -// This module takes as inputs two operands (op1 and op2) -// the operation type (op_type) and the result precision (P). -// Based on the operation and precision , it conditionally -// converts single precision values to double precision values -// and modifies the sign of op1. The converted operands are Float1 -// and Float2. +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Floating point divider/square root top unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// module convert_inputs( input [63:0] op1, // 1st input operand (A) diff --git a/wally-pipelined/src/fpu/exception_div.sv b/wally-pipelined/src/fpu/exception_div.sv index 37432068..3e701d2f 100755 --- a/wally-pipelined/src/fpu/exception_div.sv +++ b/wally-pipelined/src/fpu/exception_div.sv @@ -23,9 +23,10 @@ module exception_div ( logic BNaN; // '1' if B is a not-a-number logic ASNaN; // '1' if A is a signalling not-a-number logic BSNaN; // '1' if B is a signalling not-a-number - logic ZQNaN; // '1' if result Z is a quiet NaN + logic ZSNaN; // '1' if result Z is a quiet NaN logic ZInf; // '1' if result Z is an infnity - logic Zero; // '1' if result is zero + logic Zero; // '1' if result is zero + logic NegSqrt; // '1' if sqrt and operand is negative //***take this module out and add more registers or just recalculate it all // Determine if mantissas are all zeros @@ -48,32 +49,34 @@ module exception_div ( assign AZero = AzeroE & AzeroM; assign BZero = BzeroE & BzeroE; + // Is NaN if operand is negative and its a sqrt + assign NegSqrt = (A[63] & op_type & ~AZero); + // An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // or (A and B are both Infinite) assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | - (A[63] & op_type); - + NegSqrt; // The result is a quiet NaN if (an "Invalid Operation" exception occurs) // or (A is a NaN) or (B is a NaN). - assign ZQNaN = Invalid | ANaN | BNaN; + assign ZSNaN = Invalid | ANaN | BNaN; // The result is zero assign Zero = (AZero | BInf)&~op_type | AZero&op_type; // The result is +Inf if ((A is Inf) or (B is 0)) and (the // result is not a quiet NaN). - assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN; + assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN; // Set the type of the result as follows: // Ztype Result // 000 Normal - // 001 Quiet NaN // 010 Infinity // 011 Zero - // 110 DivZero - assign Ztype[0] = ZQNaN | Zero; - assign Ztype[1] = ZInf | Zero; - assign Ztype[2] = BZero&~op_type; - + // 110 Div by 0 + // 111 SNaN + assign Ztype[2] = (ZSNaN); + assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf); + assign Ztype[0] = (ZSNaN) | (Zero); + endmodule // exception diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index a2534149..0a937b5b 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -1,92 +1,86 @@ +/////////////////////////////////////////// // -// File name : fpdiv -// Title : Floating-Point Divider/Square-Root -// project : FPU -// Library : fpdiv -// Author(s) : James E. Stine, Jr. -// Purpose : definition of main unit to floating-point div/sqrt -// notes : +// Written: James Stine +// Modified: 8/1/2018 // -// Copyright Oklahoma State University +// Purpose: Floating point divider/square root top unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // -// Basic Operations +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: // -// Step 1: Load operands, set flags, and convert SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Exponent Logic -// Step 4: Divide/Sqrt using Goldschmidt -// Step 5: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 6: Round the result.// -// Step 7: Put quotient/remainder onto output. +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// // `timescale 1ps/1ps module fpdiv ( - input logic clk, - input logic reset, - input logic start, - input logic [63:0] op1, // 1st input operand (A) - input logic [63:0] op2, // 2nd input operand (B) - input logic [1:0] rm, // Rounding mode - specify values - input logic op_type, // Function opcode - input logic P, // Result Precision (0 for double, 1 for single) - input logic OvEn, // Overflow trap enabled - input logic UnEn, // Underflow trap enabled - output logic done, - output logic FDivBusyE, - output logic [63:0] AS_Result, // Result of operation - output logic [4:0] Flags); // IEEE exception flags + input logic clk, + input logic reset, + input logic start, + input logic [63:0] op1, + input logic [63:0] op2, + input logic [1:0] rm, + input logic op_type, + input logic P, + input logic OvEn, + input logic UnEn, + input logic XNaNQ, + input logic YNaNQ, + input logic XZeroQ, + input logic YZeroQ, + input logic XInfQ, + input logic YInfQ, - - logic [63:0] Float1; - logic [63:0] Float2; + output logic done, + output logic FDivBusyE, + output logic [63:0] AS_Result, + output logic [4:0] Flags); - logic [12:0] exp1, exp2, expF; - logic [12:0] exp_diff, bias; - logic [13:0] exp_sqrt; - logic [12:0] exp_s; - logic [12:0] exp_c; + logic [63:0] Float1; + logic [63:0] Float2; - logic [10:0] exponent; - logic [63:0] Result; - logic [52:0] mantissaA; - logic [52:0] mantissaB; + logic [12:0] exp1, exp2, expF; + logic [12:0] exp_diff, bias; + logic [13:0] exp_sqrt; + logic [63:0] Result; + logic [52:0] mantissaA; + logic [52:0] mantissaB; - logic [2:0] sel_inv; - logic Invalid; - logic [4:0] FlagsIn; + logic [2:0] sel_inv; + logic Invalid; + logic [4:0] FlagsIn; logic signResult; logic convert; - logic sub; + logic sub; - logic [63:0] q1, qm1, qp1, q0, qm0, qp0; - logic [63:0] rega_out, regb_out, regc_out, regd_out; - logic [127:0] regr_out; - logic [2:0] sel_muxa, sel_muxb; + logic [63:0] q1, qm1, qp1, q0, qm0, qp0; + logic [63:0] rega_out, regb_out, regc_out, regd_out; + logic [127:0] regr_out; + logic [2:0] sel_muxa, sel_muxb; logic sel_muxr; logic load_rega, load_regb, load_regc, load_regd, load_regr; - - logic load_regs; - logic exp_cout1, exp_cout2; - logic exp_odd, open; - // div/sqrt - // fdiv = 0 - // fsqrt = 1 + logic load_regs; + logic exp_cout1, exp_cout2; + logic exp_odd, open; + + // op_type : fdiv=0, fsqrt=1 assign Float1 = op1; assign Float2 = op_type ? op1 : op2; - - // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input Flags. The "sel_inv" is used in - // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if op1 and op2 are not zero or denormalized. - // sub is one if the effective operation is subtaction. - exception_div exc1 (.A(Float1), .B(Float2), .op_type, - // output: - .Ztype(sel_inv), .Invalid); - + + // Exception detection + exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); + // Determine Sign/Mantissa assign signResult = (Float1[63]^Float2[63]); assign mantissaA = {1'b1, Float1[51:0]}; @@ -103,29 +97,30 @@ module fpdiv ( assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd; // Choose correct exponent assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - + // Main Goldschmidt/Division Routine divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out, .regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .P, .op_type, .exp_odd); - + // FSM : control divider fsm control (.clk, .reset, .start, .op_type, - // outputs: - .done, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, - .divBusy(FDivBusyE)); + .done, .load_rega, .load_regb, .load_regc, .load_regd, + .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, + .divBusy(FDivBusyE)); // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), - .sel_inv, .Invalid, .SignR(signResult), - .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, - // outputs: - .Result, .Flags(FlagsIn)); - + .sel_inv, .Invalid, .SignR(signResult), + .Float1(op1), .Float2(op2), + .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, + .XInfQ, .YInfQ, .op_type, + .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, + .Result, .Flags(FlagsIn)); + // Store the final result and the exception flags in registers. flopenr #(64) rega (clk, reset, done, Result, AS_Result); flopenr #(5) regc (clk, reset, done, FlagsIn, Flags); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index cadfafae..34aa3edd 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, Bret Mathis +// Written: Katherine Parry, James Stine, Brett Mathis // Modified: 6/23/2021 // // Purpose: FPU @@ -25,24 +25,24 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, // instruction from IFU - input logic [`XLEN-1:0] ReadDataW,// Read data from memory - input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) - input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) - input logic StallE, StallM, StallW, // stall signals from HZU - input logic FlushE, FlushM, FlushW, // flush signals from HZU - input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) - output logic FRegWriteM, // FP register write enable - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic [4:0] SetFflagsM // FMA flags (to privileged unit) + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR + input logic [31:0] InstrD, // instruction from IFU + input logic [`XLEN-1:0] ReadDataW,// Read data from memory + input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) + input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) + input logic StallE, StallM, StallW, // stall signals from HZU + input logic FlushE, FlushM, FlushW, // flush signals from HZU + input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) + output logic FRegWriteM, // FP register write enable + output logic FStallD, // Stall the decode stage + output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic [4:0] SetFflagsM // FMA flags (to privileged unit) ); //*** make everything FLEN at some point @@ -59,338 +59,257 @@ module fpu ( generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu - // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + // control signals + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division or squareroot + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals + logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + + // result and flag signals + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags + logic [63:0] CvtResE, CvtResM; // FP <-> int convert result + logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE, ClassResM; // classify result + logic [63:0] CmpResE, CmpResM; // compare result + logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) + logic [63:0] SgnResE, SgnResM; // sign injection result + logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [`XLEN-1:0] FIntResE; + logic [63:0] FPUResultW; // final FP result being written to the FP register + + // other signals + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic FDivClk; // clock for divide/squareroot unit + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + + // DECODE STAGE + // calculate FP control signals + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, + .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal - - - // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + // FP register file + // - can read 3 registers and write 1 register every cycle + fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), + .wd4(FPUResultW), + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + + // D/E pipeline registers + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + + // EXECUTION STAGE + // Hazard unit for FPU + // - determines if any forwarding or stalls are needed + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, + .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); + mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, + {2'b0, {10{1'b1}}, 52'b0}, + {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, + FSrcYE); // Force Z to be 0 for multiply instructions + // Force Z to be 0 for multiply instructions + mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + + // unpacking unit + // - splits FP inputs into their various parts + // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // FMA + // - two stage FMA + // - execute stage - multiplication and addend shifting + // - memory stage - addition and rounding + // - handles FMA and multiply instructions + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, + .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, + .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + .FOpCtrlE, + .FmtE, .FmtM, .FrmM, + .FMAFlgM, .FMAResM); + + // clock gater + // - creates a clock that only runs durring divide/sqrt instructions + // - using the seperate clock gives the divide/sqrt unit some to get set up + // *** the module says not to use in synthisis + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(FDivClk)); + + // capture the inputs for divide/sqrt + // - if not captured any forwarded inputs will change durring computation + // - this problem is caused by stalling the execute stage + // - the other units don't have this problem, only div/sqrt stalls the execute stage + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + + // fpdivsqrt using Goldschmidt's iteration + fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + + // convert from signle to double and vice versa + cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); + + // compare unit + // - computation is done in one stage + // - writes to FP file durring min/max instructions + // - other comparisons write a 1 or 0 to the integer register + fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), + .FSrcXE, .FSrcYE, .FOpCtrlE, + .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, + .Invalid(CmpNVE), .CmpResE); + + // sign injection unit + // - computation is done in one stage + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, + .SgnNVE, .SgnResE); + + // classify + // - computation is done in one stage + // - most of the work is done in the unpacking unit + // - result is written to the integer register + fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, + .XSNaNE, .ClassResE); + + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, + .CvtResE, .CvtFlgE); + + // data to be stored in memory - to IEU + // - FP uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); + + // select a result that may be written to the FP register + mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); + mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); + + // select the result that may be written to the integer register - to IEU + mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], + CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); + + // E/M pipe registers + + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); + flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); + flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); + flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); + + // BEGIN MEMORY STAGE + // FPU flag selection - to privileged + mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result - - logic [63:0] ReadResW; // read result (load instruction) + // M/W pipe registers + flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); + flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, + {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); + + // BEGIN WRITEBACK STAGE + + // put ReadData into NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // - for load instruction + mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + + // select the result to be written to the FP register + mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); - logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags - - logic [63:0] CvtResE, CvtResM; // FP <-> int convert result - logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this - - logic [63:0] ClassResE, ClassResM; // classify result - - logic [63:0] CmpResE, CmpResM; // compare result - logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) - - logic [63:0] SgnResE, SgnResM; // sign injection result - logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) - - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage - - logic [`XLEN-1:0] FIntResE; - - logic [63:0] FPUResultW; // final FP result being written to the FP register - - // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic FDivClk; // clock for divide/squareroot unit - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //DECODE STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - - // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, - // outputs: - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // FP register file - // - can read 3 registers and write 1 register every cycle - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), - .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), - .wd4(FPUResultW), - // outputs: - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // D/E pipeline registers - //////////////////////////////////////////////////////////////////////////////////////// - - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); - - - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //EXECUTION STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - // Hazard unit for FPU - // - determines if any forwarding or stalls are needed - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, - // outputs: - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); - mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions - mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions - - - // unpacking unit - // - splits FP inputs into their various parts - // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, - // outputs: - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); - - // FMA - // - two stage FMA - // - execute stage - multiplication and addend shifting - // - memory stage - addition and rounding - // - handles FMA and multiply instructions - // - contains some E/M pipleine registers - // *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, - .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, - .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, - .FmtE, .FmtM, .FrmM, - // outputs: - .FMAFlgM, .FMAResM); - - // clock gater - // - creates a clock that only runs durring divide/sqrt instructions - // - using the seperate clock gives the divide/sqrt unit some to get set up - // *** the module says not to use in synthisis - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(FDivClk)); - - // capture the inputs for divide/sqrt - // - if not captured any forwarded inputs will change durring computation - // - this problem is caused by stalling the execute stage - // - the other units don't have this problem, only div/sqrt stalls the execute stage - flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - - // output for store instructions - //*** change to use the unpacking unit if possible - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), - // outputs: - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); - - // convert from signle to double and vice versa - cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); - - // compare unit - // - computation is done in one stage - // - writes to FP file durring min/max instructions - // - other comparisons write a 1 or 0 to the integer register - fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), - .FSrcXE, .FSrcYE, .FOpCtrlE, - .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, - // outputs: - .Invalid(CmpNVE), .CmpResE); - - // sign injection unit - // - computation is done in one stage - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, - // outputs: - .SgnNVE, .SgnResE); - - // classify - // - computation is done in one stage - // - most of the work is done in the unpacking unit - // - result is written to the integer register - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, - // outputs: - .XSNaNE, .ClassResE); - - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, - // outputs: - .CvtResE, .CvtFlgE); - - // data to be stored in memory - to IEU - // - FP uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); - - // select a result that may be written to the FP register - mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); - mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); - - // select the result that may be written to the integer register - to IEU - mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); - - - - //***will synth remove registers of values that are always zero? - //////////////////////////////////////////////////////////////////////////////////////// - // E/M pipe registers - //////////////////////////////////////////////////////////////////////////////////////// - - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); - flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); - flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); - flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - - flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); - flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); - - flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - // flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - //flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM); - //flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM); - - // flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); - // flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); - - // flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //BEGIN MEMORY STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - // FPU flag selection - to privileged - mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // M/W pipe registers - //////////////////////////////////////////////////////////////////////////////////////// - flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); - flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // BEGIN WRITEBACK STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - // put ReadData into NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // - for load instruction - mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - - // select the result to be written to the FP register - mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); - - end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteIntM = 0; - assign FWriteIntW = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; end endgenerate diff --git a/wally-pipelined/src/fpu/fregfile.sv b/wally-pipelined/src/fpu/fregfile.sv index 4b001bc9..fd8e0f60 100644 --- a/wally-pipelined/src/fpu/fregfile.sv +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -1,10 +1,9 @@ /////////////////////////////////////////// -// regfile.sv // // Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Modified: James Stine // -// Purpose: 4-port register file +// Purpose: 3-port output register file // // A component of the Wally configurable RISC-V project. // @@ -26,22 +25,20 @@ `include "wally-config.vh" module fregfile ( - input logic clk, reset, - input logic we4, - input logic [ 4:0] a1, a2, a3, a4, - input logic [63:0] wd4, + input logic clk, reset, + input logic we4, + input logic [4:0] a1, a2, a3, a4, + input logic [63:0] wd4, output logic [63:0] rd1, rd2, rd3); - - logic [63:0] rf[31:0]; - integer i; - - // three ported register file - // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) - // write fourth port on rising edge of clock (A4/WD4/WE4) - // write occurs on falling edge of clock - - // reset is intended for simulation only, not synthesis - + + logic [63:0] rf[31:0]; + integer i; + + // three ported register file + // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) + // write fourth port on rising edge of clock (A4/WD4/WE4) + // write occurs on falling edge of clock + always_ff @(negedge clk or posedge reset) if (reset) for(i=0; i<32; i++) rf[i] <= 0; else if (we4) rf[a4] <= wd4; diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 00f95993..a0e874bc 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -1,49 +1,63 @@ -module fsm ( +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 9/28/2021 +// +// Purpose: FSM for floating point divider/square root unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// - input logic clk, - input logic reset, - input logic start, - input logic op_type, - output logic done, // End of cycles - output logic load_rega, // enable for regA - output logic load_regb, // enable for regB - output logic load_regc, // enable for regC - output logic load_regd, // enable for regD - output logic load_regr, // enable for rem - output logic load_regs, // enable for q,qm,qp - output logic [2:0] sel_muxa, // Select muxA - output logic [2:0] sel_muxb, // Select muxB - output logic sel_muxr, // Select rem mux - output logic divBusy // calculation is happening +module fsm ( + input logic clk, + input logic reset, + input logic start, + input logic op_type, + output logic done, + output logic load_rega, + output logic load_regb, + output logic load_regc, + output logic load_regd, + output logic load_regr, + output logic load_regs, + output logic [2:0] sel_muxa, + output logic [2:0] sel_muxb, + output logic sel_muxr, + output logic divBusy ); - - reg [4:0] CURRENT_STATE; - reg [4:0] NEXT_STATE; - - parameter [4:0] - S0=5'd0, S1=5'd1, S2=5'd2, - S3=5'd3, S4=5'd4, S5=5'd5, - S6=5'd6, S7=5'd7, S8=5'd8, - S9=5'd9, S10=5'd10, - S13=5'd13, S14=5'd14, S15=5'd15, - S16=5'd16, S17=5'd17, S18=5'd18, - S19=5'd19, S20=5'd20, S21=5'd21, - S22=5'd22, S23=5'd23, S24=5'd24, - S25=5'd25, S26=5'd26, S27=5'd27, - S28=5'd28, S29=5'd29, S30=5'd30; + typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30} statetype; + + statetype current_state, next_state; always @(negedge clk) begin - if(reset==1'b1) - CURRENT_STATE=S0; + if (reset == 1'b1) + current_state = S0; else - CURRENT_STATE=NEXT_STATE; + current_state = next_state; end always @(*) begin - case(CURRENT_STATE) + case(current_state) S0: // iteration 0 begin if (start==1'b0) @@ -59,7 +73,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end else if (start==1'b1 && op_type==1'b0) begin @@ -74,7 +88,7 @@ module fsm ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - NEXT_STATE = S1; + next_state = S1; end // if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b1) begin @@ -89,7 +103,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S13; + next_state = S13; end else begin @@ -104,7 +118,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end end // case: S0 S1: @@ -120,7 +134,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S2; + next_state = S2; end S2: // iteration 1 begin @@ -135,7 +149,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S3; + next_state = S3; end S3: begin @@ -150,7 +164,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S4; + next_state = S4; end S4: // iteration 2 begin @@ -165,7 +179,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S5; + next_state = S5; end S5: begin @@ -180,7 +194,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; // add - NEXT_STATE = S6; + next_state = S6; end S6: // iteration 3 begin @@ -195,7 +209,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S8; + next_state = S8; end S7: begin @@ -210,7 +224,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S8; + next_state = S8; end // case: S7 S8: // q,qm,qp begin @@ -225,7 +239,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S9; + next_state = S9; end S9: // rem begin @@ -240,7 +254,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - NEXT_STATE = S10; + next_state = S10; end S10: // done begin @@ -255,7 +269,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end S13: // start of sqrt path begin @@ -270,7 +284,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - NEXT_STATE = S14; + next_state = S14; end S14: begin @@ -285,7 +299,7 @@ module fsm ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - NEXT_STATE = S15; + next_state = S15; end S15: // iteration 1 begin @@ -300,7 +314,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S16; + next_state = S16; end S16: begin @@ -315,7 +329,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S17; + next_state = S17; end S17: begin @@ -330,7 +344,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S18; + next_state = S18; end S18: // iteration 2 begin @@ -345,7 +359,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S19; + next_state = S19; end S19: begin @@ -360,7 +374,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S20; + next_state = S20; end S20: begin @@ -375,7 +389,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S21; + next_state = S21; end S21: // iteration 3 begin @@ -390,7 +404,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S22; + next_state = S22; end S22: begin @@ -405,7 +419,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S23; + next_state = S23; end S23: begin @@ -420,7 +434,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S24; + next_state = S24; end S24: // q,qm,qp begin @@ -435,7 +449,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S25; + next_state = S25; end S25: // rem begin @@ -450,7 +464,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - NEXT_STATE = S26; + next_state = S26; end S26: // done begin @@ -465,7 +479,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end default: begin @@ -480,9 +494,9 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end - endcase // case(CURRENT_STATE) - end // always @ (CURRENT_STATE or X) + endcase // case(current_state) + end // always @ (current_state or X) endmodule // fsm diff --git a/wally-pipelined/src/fpu/rounder_div.sv b/wally-pipelined/src/fpu/rounder_div.sv index ff7c4830..1d2ff1cc 100755 --- a/wally-pipelined/src/fpu/rounder_div.sv +++ b/wally-pipelined/src/fpu/rounder_div.sv @@ -1,37 +1,55 @@ +/////////////////////////////////////////// // -// The rounder takes as inputs a 64-bit value to be rounded, A, the -// exponent of the value to be rounded, the sign of the final result, Sign, -// the precision of the results, P, and the two-bit rounding mode, rm. -// It produces a rounded 52-bit result, Z, the exponent of the rounded -// result, Z_exp, and a flag that indicates if the result was rounded, -// Inexact. The rounding mode has the following values. -// rm Mode -// 00 round-to-nearest-even -// 01 round-toward-zero -// 10 round-toward-plus infinity -// 11 round-toward-minus infinity +// Written: James Stine +// Modified: 8/1/2018 // +// Purpose: Floating point divider/square root rounder unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// module rounder_div ( - input logic [1:0] rm, - input logic P, - input logic OvEn, - input logic UnEn, - input logic [12:0] exp_diff, - input logic [2:0] sel_inv, - input logic Invalid, - input logic SignR, - - input logic [63:0] q1, - input logic [63:0] qm1, - input logic [63:0] qp1, - input logic [63:0] q0, - input logic [63:0] qm0, - input logic [63:0] qp0, + input logic [1:0] rm, + input logic P, + input logic OvEn, + input logic UnEn, + input logic [12:0] exp_diff, + input logic [2:0] sel_inv, + input logic Invalid, + input logic SignR, + input logic [63:0] Float1, + input logic [63:0] Float2, + input logic XNaNQ, + input logic YNaNQ, + input logic XZeroQ, + input logic YZeroQ, + input logic XInfQ, + input logic YInfQ, + input logic op_type, + input logic [63:0] q1, + input logic [63:0] qm1, + input logic [63:0] qp1, + input logic [63:0] q0, + input logic [63:0] qm0, + input logic [63:0] qp0, input logic [127:0] regr_out, output logic [63:0] Result, - output logic [4:0] Flags + output logic [4:0] Flags ); logic Rsign; @@ -56,11 +74,15 @@ module rounder_div ( logic Texp_l7z; logic Texp_l7o; logic OvCon; - logic zero_rem; - logic [1:0] mux_mant; + logic zero_rem; + logic [1:0] mux_mant; logic sign_rem; - logic [63:0] q, qm, qp; - logic exp_ovf; + logic [63:0] q, qm, qp; + logic exp_ovf; + + logic [50:0] NaN_out; + logic NaN_Sign_out; + logic Sign_out; // Remainder = 0? assign zero_rem = ~(|regr_out); @@ -117,12 +139,11 @@ module rounder_div ( // the input was infinite or NaN or the output of the adder is zero. // 00 = Valid // 10 = NaN - assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); - assign NaN = ~sel_inv[1]& sel_inv[0]; + assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0]; + assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; - assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0]; - + assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN; // The final result is Inexact if any rounding occurred ((i.e., R or S // is one), or (if the result overflows ) or (if the result underflows and the @@ -161,18 +182,26 @@ module rounder_div ( // If the result is zero or infinity, the mantissa is all zeros. // If the result is NaN, the mantissa is 10...0 // If the result the largest floating point number, the mantissa - // is all ones. Otherwise, the mantissa is not changed. - assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); - assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}); + // is all ones. Otherwise, the mantissa is not changed. + assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0]; + assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63]; + assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | + NaN_Sign_out&(XNaNQ|YNaNQ); + // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1 + // | Float1[63]&op_type; + assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); + assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) | + (NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}}); + // For single precision, the 8 least significant bits of the exponent // and 23 most significant bits of the mantissa contain bits used // for the final result. A double precision result is returned if // overflow has occurred, the overflow trap is enabled, and a conversion // is being performed. assign OvCon = OverFlow & OvEn; - assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} - : {Rsign, Rexp, Rmant}; + assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]} + : {Sign_out, Rexp, Rmant}; endmodule // rounder diff --git a/wally-pipelined/src/fpu/sbtm_a0.sv b/wally-pipelined/src/fpu/sbtm_a0.sv index 83953787..61dd183b 100644 --- a/wally-pipelined/src/fpu/sbtm_a0.sv +++ b/wally-pipelined/src/fpu/sbtm_a0.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a0 (input logic [6:0] a, - output logic [12:0] y); + output logic [12:0] y); + always_comb case(a) 7'b0000000: y = 13'b1111111100010; @@ -137,4 +162,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a1.sv b/wally-pipelined/src/fpu/sbtm_a1.sv index 76e4bdec..88845283 100644 --- a/wally-pipelined/src/fpu/sbtm_a1.sv +++ b/wally-pipelined/src/fpu/sbtm_a1.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a1 (input logic [6:0] a, - output logic [4:0] y); + output logic [4:0] y); + always_comb case(a) 7'b0000000: y = 5'b11100; @@ -137,4 +162,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a2.sv b/wally-pipelined/src/fpu/sbtm_a2.sv index ae407ec8..8d32ad15 100755 --- a/wally-pipelined/src/fpu/sbtm_a2.sv +++ b/wally-pipelined/src/fpu/sbtm_a2.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a2 (input logic [7:0] a, - output logic [13:0] y); + output logic [13:0] y); + always_comb case(a) 8'b01000000: y = 14'b10110100010111; @@ -201,4 +226,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a3.sv b/wally-pipelined/src/fpu/sbtm_a3.sv index c6b36793..5958c3bf 100755 --- a/wally-pipelined/src/fpu/sbtm_a3.sv +++ b/wally-pipelined/src/fpu/sbtm_a3.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a3 (input logic [7:0] a, - output logic [5:0] y); + output logic [5:0] y); + always_comb case(a) 8'b01000000: y = 6'b100110; diff --git a/wally-pipelined/src/fpu/sbtm_div.sv b/wally-pipelined/src/fpu/sbtm_div.sv index 53b56dbd..999106d8 100644 --- a/wally-pipelined/src/fpu/sbtm_div.sv +++ b/wally-pipelined/src/fpu/sbtm_div.sv @@ -1,3 +1,27 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup for divide portion of fpdivsqrt +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); // bit partitions diff --git a/wally-pipelined/src/fpu/sbtm_sqrt.sv b/wally-pipelined/src/fpu/sbtm_sqrt.sv index 27ffbecc..fdf0bb6d 100644 --- a/wally-pipelined/src/fpu/sbtm_sqrt.sv +++ b/wally-pipelined/src/fpu/sbtm_sqrt.sv @@ -1,3 +1,27 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); // bit partitions From 2afa6e7a6e2930194b6af9ea43b9b020ca0cd8e3 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Wed, 6 Oct 2021 08:56:01 -0500 Subject: [PATCH 3/3] Add TV for testbenches (to be added shortly) however had to leave off fma due to size. The TV were slightly modified within TestFloat to add underscores for readability. The scripts I created to create these TV were also included --- .../testbench/fp/create_vectors32.csh | 30 +++++++++++++++++++ .../testbench/fp/create_vectors64.csh | 30 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100755 wally-pipelined/testbench/fp/create_vectors32.csh create mode 100755 wally-pipelined/testbench/fp/create_vectors64.csh diff --git a/wally-pipelined/testbench/fp/create_vectors32.csh b/wally-pipelined/testbench/fp/create_vectors32.csh new file mode 100755 index 00000000..1a43eb4c --- /dev/null +++ b/wally-pipelined/testbench/fp/create_vectors32.csh @@ -0,0 +1,30 @@ +#!/bin/sh +./testfloat_gen -rnear_even f32_add > f32_add_rne.tv +./testfloat_gen -rminMag f32_add > f32_add_rz.tv +./testfloat_gen -rmax f32_add > f32_add_ru.tv +./testfloat_gen -rmin f32_add > f32_add_rd.tv + +./testfloat_gen -rnear_even f32_sub > f32_sub_rne.tv +./testfloat_gen -rminMag f32_sub > f32_sub_rz.tv +./testfloat_gen -rmax f32_sub > f32_sub_ru.tv +./testfloat_gen -rmin f32_sub > f32_sub_rd.tv + +./testfloat_gen -rnear_even f32_mul > f32_mul_rne.tv +./testfloat_gen -rminMag f32_mul > f32_mul_rz.tv +./testfloat_gen -rmax f32_mul > f32_mul_ru.tv +./testfloat_gen -rmin f32_mul > f32_mul_rd.tv + +./testfloat_gen -rnear_even f32_mulAdd > f32_fma_rne.tv +./testfloat_gen -rminMag f32_mulAdd > f32_fma_rz.tv +./testfloat_gen -rmax f32_mulAdd > f32_fma_ru.tv +./testfloat_gen -rmin f32_mulAdd > f32_fma_rd.tv + +./testfloat_gen -rnear_even f32_div > f32_div_rne.tv +./testfloat_gen -rminMag f32_div > f32_div_rz.tv +./testfloat_gen -rmax f32_div > f32_div_ru.tv +./testfloat_gen -rmin f32_div > f32_div_rd.tv + +./testfloat_gen -rnear_even f32_sqrt > f32_sqrt_rne.tv +./testfloat_gen -rminMag f32_sqrt > f32_sqrt_rz.tv +./testfloat_gen -rmax f32_sqrt > f32_sqrt_ru.tv +./testfloat_gen -rmin f32_sqrt > f32_sqrt_rd.tv diff --git a/wally-pipelined/testbench/fp/create_vectors64.csh b/wally-pipelined/testbench/fp/create_vectors64.csh new file mode 100755 index 00000000..bb0c0fda --- /dev/null +++ b/wally-pipelined/testbench/fp/create_vectors64.csh @@ -0,0 +1,30 @@ +#!/bin/sh +./testfloat_gen -rnear_even f64_add > f64_add_rne.tv +./testfloat_gen -rminMag f64_add > f64_add_rz.tv +./testfloat_gen -rmax f64_add > f64_add_ru.tv +./testfloat_gen -rmin f64_add > f64_add_rd.tv + +./testfloat_gen -rnear_even f64_sub > f64_sub_rne.tv +./testfloat_gen -rminMag f64_sub > f64_sub_rz.tv +./testfloat_gen -rmax f64_sub > f64_sub_ru.tv +./testfloat_gen -rmin f64_sub > f64_sub_rd.tv + +./testfloat_gen -rnear_even f64_mul > f64_mul_rne.tv +./testfloat_gen -rminMag f64_mul > f64_mul_rz.tv +./testfloat_gen -rmax f64_mul > f64_mul_ru.tv +./testfloat_gen -rmin f64_mul > f64_mul_rd.tv + +./testfloat_gen -rnear_even f64_mulAdd > f64_fma_rne.tv +./testfloat_gen -rminMag f64_mulAdd > f64_fma_rz.tv +./testfloat_gen -rmax f64_mulAdd > f64_fma_ru.tv +./testfloat_gen -rmin f64_mulAdd > f64_fma_rd.tv + +./testfloat_gen -rnear_even f64_div > f64_div_rne.tv +./testfloat_gen -rminMag f64_div > f64_div_rz.tv +./testfloat_gen -rmax f64_div > f64_div_ru.tv +./testfloat_gen -rmin f64_div > f64_div_rd.tv + +./testfloat_gen -rnear_even f64_sqrt > f64_sqrt_rne.tv +./testfloat_gen -rminMag f64_sqrt > f64_sqrt_rz.tv +./testfloat_gen -rmax f64_sqrt > f64_sqrt_ru.tv +./testfloat_gen -rmin f64_sqrt > f64_sqrt_rd.tv