From a91c0c8fc714017f25d0aa6a4144e68ee4028efd Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Wed, 6 Oct 2021 08:26:09 -0500 Subject: [PATCH] Make changes to fpdiv - still working on clock issue with fsm that was changed from posedge to negedge - also updated fpdivsqrt rounding to handle testfloat --- wally-pipelined/src/fpu/convert_inputs.sv | 29 +- wally-pipelined/src/fpu/exception_div.sv | 27 +- wally-pipelined/src/fpu/fpdiv.sv | 155 +++--- wally-pipelined/src/fpu/fpu.sv | 613 ++++++++++------------ wally-pipelined/src/fpu/fregfile.sv | 33 +- wally-pipelined/src/fpu/fsm.sv | 146 +++--- wally-pipelined/src/fpu/rounder_div.sv | 109 ++-- wally-pipelined/src/fpu/sbtm_a0.sv | 29 +- wally-pipelined/src/fpu/sbtm_a1.sv | 29 +- wally-pipelined/src/fpu/sbtm_a2.sv | 29 +- wally-pipelined/src/fpu/sbtm_a3.sv | 27 +- wally-pipelined/src/fpu/sbtm_div.sv | 24 + wally-pipelined/src/fpu/sbtm_sqrt.sv | 24 + 13 files changed, 698 insertions(+), 576 deletions(-) diff --git a/wally-pipelined/src/fpu/convert_inputs.sv b/wally-pipelined/src/fpu/convert_inputs.sv index bf56cb006..9a0584baa 100755 --- a/wally-pipelined/src/fpu/convert_inputs.sv +++ b/wally-pipelined/src/fpu/convert_inputs.sv @@ -1,9 +1,26 @@ -// This module takes as inputs two operands (op1 and op2) -// the operation type (op_type) and the result precision (P). -// Based on the operation and precision , it conditionally -// converts single precision values to double precision values -// and modifies the sign of op1. The converted operands are Float1 -// and Float2. +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Floating point divider/square root top unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// module convert_inputs( input [63:0] op1, // 1st input operand (A) diff --git a/wally-pipelined/src/fpu/exception_div.sv b/wally-pipelined/src/fpu/exception_div.sv index 374320683..3e701d2fb 100755 --- a/wally-pipelined/src/fpu/exception_div.sv +++ b/wally-pipelined/src/fpu/exception_div.sv @@ -23,9 +23,10 @@ module exception_div ( logic BNaN; // '1' if B is a not-a-number logic ASNaN; // '1' if A is a signalling not-a-number logic BSNaN; // '1' if B is a signalling not-a-number - logic ZQNaN; // '1' if result Z is a quiet NaN + logic ZSNaN; // '1' if result Z is a quiet NaN logic ZInf; // '1' if result Z is an infnity - logic Zero; // '1' if result is zero + logic Zero; // '1' if result is zero + logic NegSqrt; // '1' if sqrt and operand is negative //***take this module out and add more registers or just recalculate it all // Determine if mantissas are all zeros @@ -48,32 +49,34 @@ module exception_div ( assign AZero = AzeroE & AzeroM; assign BZero = BzeroE & BzeroE; + // Is NaN if operand is negative and its a sqrt + assign NegSqrt = (A[63] & op_type & ~AZero); + // An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // or (A and B are both Infinite) assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | - (A[63] & op_type); - + NegSqrt; // The result is a quiet NaN if (an "Invalid Operation" exception occurs) // or (A is a NaN) or (B is a NaN). - assign ZQNaN = Invalid | ANaN | BNaN; + assign ZSNaN = Invalid | ANaN | BNaN; // The result is zero assign Zero = (AZero | BInf)&~op_type | AZero&op_type; // The result is +Inf if ((A is Inf) or (B is 0)) and (the // result is not a quiet NaN). - assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN; + assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN; // Set the type of the result as follows: // Ztype Result // 000 Normal - // 001 Quiet NaN // 010 Infinity // 011 Zero - // 110 DivZero - assign Ztype[0] = ZQNaN | Zero; - assign Ztype[1] = ZInf | Zero; - assign Ztype[2] = BZero&~op_type; - + // 110 Div by 0 + // 111 SNaN + assign Ztype[2] = (ZSNaN); + assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf); + assign Ztype[0] = (ZSNaN) | (Zero); + endmodule // exception diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index a2534149f..0a937b5b0 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -1,92 +1,86 @@ +/////////////////////////////////////////// // -// File name : fpdiv -// Title : Floating-Point Divider/Square-Root -// project : FPU -// Library : fpdiv -// Author(s) : James E. Stine, Jr. -// Purpose : definition of main unit to floating-point div/sqrt -// notes : +// Written: James Stine +// Modified: 8/1/2018 // -// Copyright Oklahoma State University +// Purpose: Floating point divider/square root top unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // -// Basic Operations +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: // -// Step 1: Load operands, set flags, and convert SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Exponent Logic -// Step 4: Divide/Sqrt using Goldschmidt -// Step 5: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 6: Round the result.// -// Step 7: Put quotient/remainder onto output. +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// // `timescale 1ps/1ps module fpdiv ( - input logic clk, - input logic reset, - input logic start, - input logic [63:0] op1, // 1st input operand (A) - input logic [63:0] op2, // 2nd input operand (B) - input logic [1:0] rm, // Rounding mode - specify values - input logic op_type, // Function opcode - input logic P, // Result Precision (0 for double, 1 for single) - input logic OvEn, // Overflow trap enabled - input logic UnEn, // Underflow trap enabled - output logic done, - output logic FDivBusyE, - output logic [63:0] AS_Result, // Result of operation - output logic [4:0] Flags); // IEEE exception flags + input logic clk, + input logic reset, + input logic start, + input logic [63:0] op1, + input logic [63:0] op2, + input logic [1:0] rm, + input logic op_type, + input logic P, + input logic OvEn, + input logic UnEn, + input logic XNaNQ, + input logic YNaNQ, + input logic XZeroQ, + input logic YZeroQ, + input logic XInfQ, + input logic YInfQ, - - logic [63:0] Float1; - logic [63:0] Float2; + output logic done, + output logic FDivBusyE, + output logic [63:0] AS_Result, + output logic [4:0] Flags); - logic [12:0] exp1, exp2, expF; - logic [12:0] exp_diff, bias; - logic [13:0] exp_sqrt; - logic [12:0] exp_s; - logic [12:0] exp_c; + logic [63:0] Float1; + logic [63:0] Float2; - logic [10:0] exponent; - logic [63:0] Result; - logic [52:0] mantissaA; - logic [52:0] mantissaB; + logic [12:0] exp1, exp2, expF; + logic [12:0] exp_diff, bias; + logic [13:0] exp_sqrt; + logic [63:0] Result; + logic [52:0] mantissaA; + logic [52:0] mantissaB; - logic [2:0] sel_inv; - logic Invalid; - logic [4:0] FlagsIn; + logic [2:0] sel_inv; + logic Invalid; + logic [4:0] FlagsIn; logic signResult; logic convert; - logic sub; + logic sub; - logic [63:0] q1, qm1, qp1, q0, qm0, qp0; - logic [63:0] rega_out, regb_out, regc_out, regd_out; - logic [127:0] regr_out; - logic [2:0] sel_muxa, sel_muxb; + logic [63:0] q1, qm1, qp1, q0, qm0, qp0; + logic [63:0] rega_out, regb_out, regc_out, regd_out; + logic [127:0] regr_out; + logic [2:0] sel_muxa, sel_muxb; logic sel_muxr; logic load_rega, load_regb, load_regc, load_regd, load_regr; - - logic load_regs; - logic exp_cout1, exp_cout2; - logic exp_odd, open; - // div/sqrt - // fdiv = 0 - // fsqrt = 1 + logic load_regs; + logic exp_cout1, exp_cout2; + logic exp_odd, open; + + // op_type : fdiv=0, fsqrt=1 assign Float1 = op1; assign Float2 = op_type ? op1 : op2; - - // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input Flags. The "sel_inv" is used in - // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if op1 and op2 are not zero or denormalized. - // sub is one if the effective operation is subtaction. - exception_div exc1 (.A(Float1), .B(Float2), .op_type, - // output: - .Ztype(sel_inv), .Invalid); - + + // Exception detection + exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); + // Determine Sign/Mantissa assign signResult = (Float1[63]^Float2[63]); assign mantissaA = {1'b1, Float1[51:0]}; @@ -103,29 +97,30 @@ module fpdiv ( assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd; // Choose correct exponent assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - + // Main Goldschmidt/Division Routine divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out, .regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .P, .op_type, .exp_odd); - + // FSM : control divider fsm control (.clk, .reset, .start, .op_type, - // outputs: - .done, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, - .divBusy(FDivBusyE)); + .done, .load_rega, .load_regb, .load_regc, .load_regd, + .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, + .divBusy(FDivBusyE)); // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), - .sel_inv, .Invalid, .SignR(signResult), - .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, - // outputs: - .Result, .Flags(FlagsIn)); - + .sel_inv, .Invalid, .SignR(signResult), + .Float1(op1), .Float2(op2), + .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, + .XInfQ, .YInfQ, .op_type, + .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, + .Result, .Flags(FlagsIn)); + // Store the final result and the exception flags in registers. flopenr #(64) rega (clk, reset, done, Result, AS_Result); flopenr #(5) regc (clk, reset, done, FlagsIn, Flags); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index cadfafae0..34aa3edd3 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, Bret Mathis +// Written: Katherine Parry, James Stine, Brett Mathis // Modified: 6/23/2021 // // Purpose: FPU @@ -25,24 +25,24 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, // instruction from IFU - input logic [`XLEN-1:0] ReadDataW,// Read data from memory - input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) - input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) - input logic StallE, StallM, StallW, // stall signals from HZU - input logic FlushE, FlushM, FlushW, // flush signals from HZU - input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) - output logic FRegWriteM, // FP register write enable - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic [4:0] SetFflagsM // FMA flags (to privileged unit) + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR + input logic [31:0] InstrD, // instruction from IFU + input logic [`XLEN-1:0] ReadDataW,// Read data from memory + input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) + input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) + input logic StallE, StallM, StallW, // stall signals from HZU + input logic FlushE, FlushM, FlushW, // flush signals from HZU + input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) + output logic FRegWriteM, // FP register write enable + output logic FStallD, // Stall the decode stage + output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic [4:0] SetFflagsM // FMA flags (to privileged unit) ); //*** make everything FLEN at some point @@ -59,338 +59,257 @@ module fpu ( generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu - // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + // control signals + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division or squareroot + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals + logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + + // result and flag signals + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags + logic [63:0] CvtResE, CvtResM; // FP <-> int convert result + logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE, ClassResM; // classify result + logic [63:0] CmpResE, CmpResM; // compare result + logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) + logic [63:0] SgnResE, SgnResM; // sign injection result + logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [`XLEN-1:0] FIntResE; + logic [63:0] FPUResultW; // final FP result being written to the FP register + + // other signals + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic FDivClk; // clock for divide/squareroot unit + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + + // DECODE STAGE + // calculate FP control signals + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, + .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal - - - // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags + // FP register file + // - can read 3 registers and write 1 register every cycle + fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), + .wd4(FPUResultW), + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + + // D/E pipeline registers + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + + // EXECUTION STAGE + // Hazard unit for FPU + // - determines if any forwarding or stalls are needed + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, + .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); + mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, + {2'b0, {10{1'b1}}, 52'b0}, + {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, + FSrcYE); // Force Z to be 0 for multiply instructions + // Force Z to be 0 for multiply instructions + mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + + // unpacking unit + // - splits FP inputs into their various parts + // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // FMA + // - two stage FMA + // - execute stage - multiplication and addend shifting + // - memory stage - addition and rounding + // - handles FMA and multiply instructions + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, + .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, + .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + .FOpCtrlE, + .FmtE, .FmtM, .FrmM, + .FMAFlgM, .FMAResM); + + // clock gater + // - creates a clock that only runs durring divide/sqrt instructions + // - using the seperate clock gives the divide/sqrt unit some to get set up + // *** the module says not to use in synthisis + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(FDivClk)); + + // capture the inputs for divide/sqrt + // - if not captured any forwarded inputs will change durring computation + // - this problem is caused by stalling the execute stage + // - the other units don't have this problem, only div/sqrt stalls the execute stage + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + + // fpdivsqrt using Goldschmidt's iteration + fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + + // convert from signle to double and vice versa + cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); + + // compare unit + // - computation is done in one stage + // - writes to FP file durring min/max instructions + // - other comparisons write a 1 or 0 to the integer register + fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), + .FSrcXE, .FSrcYE, .FOpCtrlE, + .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, + .Invalid(CmpNVE), .CmpResE); + + // sign injection unit + // - computation is done in one stage + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, + .SgnNVE, .SgnResE); + + // classify + // - computation is done in one stage + // - most of the work is done in the unpacking unit + // - result is written to the integer register + fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, + .XSNaNE, .ClassResE); + + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, + .CvtResE, .CvtFlgE); + + // data to be stored in memory - to IEU + // - FP uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); + + // select a result that may be written to the FP register + mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); + mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); + + // select the result that may be written to the integer register - to IEU + mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], + CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); + + // E/M pipe registers + + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); + flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); + flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); + flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); + + // BEGIN MEMORY STAGE + // FPU flag selection - to privileged + mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result - - logic [63:0] ReadResW; // read result (load instruction) + // M/W pipe registers + flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); + flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, + {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); + + // BEGIN WRITEBACK STAGE + + // put ReadData into NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // - for load instruction + mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + + // select the result to be written to the FP register + mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); - logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags - - logic [63:0] CvtResE, CvtResM; // FP <-> int convert result - logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this - - logic [63:0] ClassResE, ClassResM; // classify result - - logic [63:0] CmpResE, CmpResM; // compare result - logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) - - logic [63:0] SgnResE, SgnResM; // sign injection result - logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) - - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage - - logic [`XLEN-1:0] FIntResE; - - logic [63:0] FPUResultW; // final FP result being written to the FP register - - // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic FDivClk; // clock for divide/squareroot unit - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //DECODE STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - - // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, - // outputs: - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // FP register file - // - can read 3 registers and write 1 register every cycle - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), - .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), - .wd4(FPUResultW), - // outputs: - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // D/E pipeline registers - //////////////////////////////////////////////////////////////////////////////////////// - - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); - - - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //EXECUTION STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - // Hazard unit for FPU - // - determines if any forwarding or stalls are needed - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, - // outputs: - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); - mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions - mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions - - - // unpacking unit - // - splits FP inputs into their various parts - // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, - // outputs: - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); - - // FMA - // - two stage FMA - // - execute stage - multiplication and addend shifting - // - memory stage - addition and rounding - // - handles FMA and multiply instructions - // - contains some E/M pipleine registers - // *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, - .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, - .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, - .FmtE, .FmtM, .FrmM, - // outputs: - .FMAFlgM, .FMAResM); - - // clock gater - // - creates a clock that only runs durring divide/sqrt instructions - // - using the seperate clock gives the divide/sqrt unit some to get set up - // *** the module says not to use in synthisis - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(FDivClk)); - - // capture the inputs for divide/sqrt - // - if not captured any forwarded inputs will change durring computation - // - this problem is caused by stalling the execute stage - // - the other units don't have this problem, only div/sqrt stalls the execute stage - flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - - // output for store instructions - //*** change to use the unpacking unit if possible - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), - // outputs: - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); - - // convert from signle to double and vice versa - cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); - - // compare unit - // - computation is done in one stage - // - writes to FP file durring min/max instructions - // - other comparisons write a 1 or 0 to the integer register - fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), - .FSrcXE, .FSrcYE, .FOpCtrlE, - .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, - // outputs: - .Invalid(CmpNVE), .CmpResE); - - // sign injection unit - // - computation is done in one stage - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, - // outputs: - .SgnNVE, .SgnResE); - - // classify - // - computation is done in one stage - // - most of the work is done in the unpacking unit - // - result is written to the integer register - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, - // outputs: - .XSNaNE, .ClassResE); - - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, - // outputs: - .CvtResE, .CvtFlgE); - - // data to be stored in memory - to IEU - // - FP uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); - - // select a result that may be written to the FP register - mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); - mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); - - // select the result that may be written to the integer register - to IEU - mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); - - - - //***will synth remove registers of values that are always zero? - //////////////////////////////////////////////////////////////////////////////////////// - // E/M pipe registers - //////////////////////////////////////////////////////////////////////////////////////// - - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); - flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); - flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); - flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - - flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); - flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); - - flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - // flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - //flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM); - //flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM); - - // flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); - // flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); - - // flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - //BEGIN MEMORY STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - - // FPU flag selection - to privileged - mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); - - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // M/W pipe registers - //////////////////////////////////////////////////////////////////////////////////////// - flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); - flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); - - - - - //////////////////////////////////////////////////////////////////////////////////////// - // BEGIN WRITEBACK STAGE - //////////////////////////////////////////////////////////////////////////////////////// - - // put ReadData into NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // - for load instruction - mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - - // select the result to be written to the FP register - mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); - - end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteIntM = 0; - assign FWriteIntW = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; end endgenerate diff --git a/wally-pipelined/src/fpu/fregfile.sv b/wally-pipelined/src/fpu/fregfile.sv index 4b001bc93..fd8e0f608 100644 --- a/wally-pipelined/src/fpu/fregfile.sv +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -1,10 +1,9 @@ /////////////////////////////////////////// -// regfile.sv // // Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Modified: James Stine // -// Purpose: 4-port register file +// Purpose: 3-port output register file // // A component of the Wally configurable RISC-V project. // @@ -26,22 +25,20 @@ `include "wally-config.vh" module fregfile ( - input logic clk, reset, - input logic we4, - input logic [ 4:0] a1, a2, a3, a4, - input logic [63:0] wd4, + input logic clk, reset, + input logic we4, + input logic [4:0] a1, a2, a3, a4, + input logic [63:0] wd4, output logic [63:0] rd1, rd2, rd3); - - logic [63:0] rf[31:0]; - integer i; - - // three ported register file - // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) - // write fourth port on rising edge of clock (A4/WD4/WE4) - // write occurs on falling edge of clock - - // reset is intended for simulation only, not synthesis - + + logic [63:0] rf[31:0]; + integer i; + + // three ported register file + // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) + // write fourth port on rising edge of clock (A4/WD4/WE4) + // write occurs on falling edge of clock + always_ff @(negedge clk or posedge reset) if (reset) for(i=0; i<32; i++) rf[i] <= 0; else if (we4) rf[a4] <= wd4; diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 00f959930..a0e874bc7 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -1,49 +1,63 @@ -module fsm ( +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 9/28/2021 +// +// Purpose: FSM for floating point divider/square root unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// - input logic clk, - input logic reset, - input logic start, - input logic op_type, - output logic done, // End of cycles - output logic load_rega, // enable for regA - output logic load_regb, // enable for regB - output logic load_regc, // enable for regC - output logic load_regd, // enable for regD - output logic load_regr, // enable for rem - output logic load_regs, // enable for q,qm,qp - output logic [2:0] sel_muxa, // Select muxA - output logic [2:0] sel_muxb, // Select muxB - output logic sel_muxr, // Select rem mux - output logic divBusy // calculation is happening +module fsm ( + input logic clk, + input logic reset, + input logic start, + input logic op_type, + output logic done, + output logic load_rega, + output logic load_regb, + output logic load_regc, + output logic load_regd, + output logic load_regr, + output logic load_regs, + output logic [2:0] sel_muxa, + output logic [2:0] sel_muxb, + output logic sel_muxr, + output logic divBusy ); - - reg [4:0] CURRENT_STATE; - reg [4:0] NEXT_STATE; - - parameter [4:0] - S0=5'd0, S1=5'd1, S2=5'd2, - S3=5'd3, S4=5'd4, S5=5'd5, - S6=5'd6, S7=5'd7, S8=5'd8, - S9=5'd9, S10=5'd10, - S13=5'd13, S14=5'd14, S15=5'd15, - S16=5'd16, S17=5'd17, S18=5'd18, - S19=5'd19, S20=5'd20, S21=5'd21, - S22=5'd22, S23=5'd23, S24=5'd24, - S25=5'd25, S26=5'd26, S27=5'd27, - S28=5'd28, S29=5'd29, S30=5'd30; + typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30} statetype; + + statetype current_state, next_state; always @(negedge clk) begin - if(reset==1'b1) - CURRENT_STATE=S0; + if (reset == 1'b1) + current_state = S0; else - CURRENT_STATE=NEXT_STATE; + current_state = next_state; end always @(*) begin - case(CURRENT_STATE) + case(current_state) S0: // iteration 0 begin if (start==1'b0) @@ -59,7 +73,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end else if (start==1'b1 && op_type==1'b0) begin @@ -74,7 +88,7 @@ module fsm ( sel_muxa = 3'b001; sel_muxb = 3'b001; sel_muxr = 1'b0; - NEXT_STATE = S1; + next_state = S1; end // if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b1) begin @@ -89,7 +103,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S13; + next_state = S13; end else begin @@ -104,7 +118,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end end // case: S0 S1: @@ -120,7 +134,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S2; + next_state = S2; end S2: // iteration 1 begin @@ -135,7 +149,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S3; + next_state = S3; end S3: begin @@ -150,7 +164,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S4; + next_state = S4; end S4: // iteration 2 begin @@ -165,7 +179,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S5; + next_state = S5; end S5: begin @@ -180,7 +194,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; // add - NEXT_STATE = S6; + next_state = S6; end S6: // iteration 3 begin @@ -195,7 +209,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S8; + next_state = S8; end S7: begin @@ -210,7 +224,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S8; + next_state = S8; end // case: S7 S8: // q,qm,qp begin @@ -225,7 +239,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S9; + next_state = S9; end S9: // rem begin @@ -240,7 +254,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b1; - NEXT_STATE = S10; + next_state = S10; end S10: // done begin @@ -255,7 +269,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end S13: // start of sqrt path begin @@ -270,7 +284,7 @@ module fsm ( sel_muxa = 3'b010; sel_muxb = 3'b001; sel_muxr = 1'b0; - NEXT_STATE = S14; + next_state = S14; end S14: begin @@ -285,7 +299,7 @@ module fsm ( sel_muxa = 3'b001; sel_muxb = 3'b100; sel_muxr = 1'b0; - NEXT_STATE = S15; + next_state = S15; end S15: // iteration 1 begin @@ -300,7 +314,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S16; + next_state = S16; end S16: begin @@ -315,7 +329,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S17; + next_state = S17; end S17: begin @@ -330,7 +344,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S18; + next_state = S18; end S18: // iteration 2 begin @@ -345,7 +359,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S19; + next_state = S19; end S19: begin @@ -360,7 +374,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S20; + next_state = S20; end S20: begin @@ -375,7 +389,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S21; + next_state = S21; end S21: // iteration 3 begin @@ -390,7 +404,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S22; + next_state = S22; end S22: begin @@ -405,7 +419,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b011; sel_muxr = 1'b0; - NEXT_STATE = S23; + next_state = S23; end S23: begin @@ -420,7 +434,7 @@ module fsm ( sel_muxa = 3'b100; sel_muxb = 3'b010; sel_muxr = 1'b0; - NEXT_STATE = S24; + next_state = S24; end S24: // q,qm,qp begin @@ -435,7 +449,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S25; + next_state = S25; end S25: // rem begin @@ -450,7 +464,7 @@ module fsm ( sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - NEXT_STATE = S26; + next_state = S26; end S26: // done begin @@ -465,7 +479,7 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end default: begin @@ -480,9 +494,9 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; - NEXT_STATE = S0; + next_state = S0; end - endcase // case(CURRENT_STATE) - end // always @ (CURRENT_STATE or X) + endcase // case(current_state) + end // always @ (current_state or X) endmodule // fsm diff --git a/wally-pipelined/src/fpu/rounder_div.sv b/wally-pipelined/src/fpu/rounder_div.sv index ff7c4830f..1d2ff1cc3 100755 --- a/wally-pipelined/src/fpu/rounder_div.sv +++ b/wally-pipelined/src/fpu/rounder_div.sv @@ -1,37 +1,55 @@ +/////////////////////////////////////////// // -// The rounder takes as inputs a 64-bit value to be rounded, A, the -// exponent of the value to be rounded, the sign of the final result, Sign, -// the precision of the results, P, and the two-bit rounding mode, rm. -// It produces a rounded 52-bit result, Z, the exponent of the rounded -// result, Z_exp, and a flag that indicates if the result was rounded, -// Inexact. The rounding mode has the following values. -// rm Mode -// 00 round-to-nearest-even -// 01 round-toward-zero -// 10 round-toward-plus infinity -// 11 round-toward-minus infinity +// Written: James Stine +// Modified: 8/1/2018 // +// Purpose: Floating point divider/square root rounder unit (Goldschmidt) +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// module rounder_div ( - input logic [1:0] rm, - input logic P, - input logic OvEn, - input logic UnEn, - input logic [12:0] exp_diff, - input logic [2:0] sel_inv, - input logic Invalid, - input logic SignR, - - input logic [63:0] q1, - input logic [63:0] qm1, - input logic [63:0] qp1, - input logic [63:0] q0, - input logic [63:0] qm0, - input logic [63:0] qp0, + input logic [1:0] rm, + input logic P, + input logic OvEn, + input logic UnEn, + input logic [12:0] exp_diff, + input logic [2:0] sel_inv, + input logic Invalid, + input logic SignR, + input logic [63:0] Float1, + input logic [63:0] Float2, + input logic XNaNQ, + input logic YNaNQ, + input logic XZeroQ, + input logic YZeroQ, + input logic XInfQ, + input logic YInfQ, + input logic op_type, + input logic [63:0] q1, + input logic [63:0] qm1, + input logic [63:0] qp1, + input logic [63:0] q0, + input logic [63:0] qm0, + input logic [63:0] qp0, input logic [127:0] regr_out, output logic [63:0] Result, - output logic [4:0] Flags + output logic [4:0] Flags ); logic Rsign; @@ -56,11 +74,15 @@ module rounder_div ( logic Texp_l7z; logic Texp_l7o; logic OvCon; - logic zero_rem; - logic [1:0] mux_mant; + logic zero_rem; + logic [1:0] mux_mant; logic sign_rem; - logic [63:0] q, qm, qp; - logic exp_ovf; + logic [63:0] q, qm, qp; + logic exp_ovf; + + logic [50:0] NaN_out; + logic NaN_Sign_out; + logic Sign_out; // Remainder = 0? assign zero_rem = ~(|regr_out); @@ -117,12 +139,11 @@ module rounder_div ( // the input was infinite or NaN or the output of the adder is zero. // 00 = Valid // 10 = NaN - assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); - assign NaN = ~sel_inv[1]& sel_inv[0]; + assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0]; + assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; - assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0]; - + assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN; // The final result is Inexact if any rounding occurred ((i.e., R or S // is one), or (if the result overflows ) or (if the result underflows and the @@ -161,18 +182,26 @@ module rounder_div ( // If the result is zero or infinity, the mantissa is all zeros. // If the result is NaN, the mantissa is 10...0 // If the result the largest floating point number, the mantissa - // is all ones. Otherwise, the mantissa is not changed. - assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); - assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}); + // is all ones. Otherwise, the mantissa is not changed. + assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0]; + assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63]; + assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | + NaN_Sign_out&(XNaNQ|YNaNQ); + // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1 + // | Float1[63]&op_type; + assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); + assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) | + (NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}}); + // For single precision, the 8 least significant bits of the exponent // and 23 most significant bits of the mantissa contain bits used // for the final result. A double precision result is returned if // overflow has occurred, the overflow trap is enabled, and a conversion // is being performed. assign OvCon = OverFlow & OvEn; - assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} - : {Rsign, Rexp, Rmant}; + assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]} + : {Sign_out, Rexp, Rmant}; endmodule // rounder diff --git a/wally-pipelined/src/fpu/sbtm_a0.sv b/wally-pipelined/src/fpu/sbtm_a0.sv index 83953787b..61dd183bb 100644 --- a/wally-pipelined/src/fpu/sbtm_a0.sv +++ b/wally-pipelined/src/fpu/sbtm_a0.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a0 (input logic [6:0] a, - output logic [12:0] y); + output logic [12:0] y); + always_comb case(a) 7'b0000000: y = 13'b1111111100010; @@ -137,4 +162,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a1.sv b/wally-pipelined/src/fpu/sbtm_a1.sv index 76e4bdec9..88845283c 100644 --- a/wally-pipelined/src/fpu/sbtm_a1.sv +++ b/wally-pipelined/src/fpu/sbtm_a1.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a1 (input logic [6:0] a, - output logic [4:0] y); + output logic [4:0] y); + always_comb case(a) 7'b0000000: y = 5'b11100; @@ -137,4 +162,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a2.sv b/wally-pipelined/src/fpu/sbtm_a2.sv index ae407ec81..8d32ad157 100755 --- a/wally-pipelined/src/fpu/sbtm_a2.sv +++ b/wally-pipelined/src/fpu/sbtm_a2.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a2 (input logic [7:0] a, - output logic [13:0] y); + output logic [13:0] y); + always_comb case(a) 8'b01000000: y = 14'b10110100010111; @@ -201,4 +226,4 @@ endmodule // sbtm_a0 - \ No newline at end of file + diff --git a/wally-pipelined/src/fpu/sbtm_a3.sv b/wally-pipelined/src/fpu/sbtm_a3.sv index c6b367933..5958c3bf6 100755 --- a/wally-pipelined/src/fpu/sbtm_a3.sv +++ b/wally-pipelined/src/fpu/sbtm_a3.sv @@ -1,5 +1,30 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_a3 (input logic [7:0] a, - output logic [5:0] y); + output logic [5:0] y); + always_comb case(a) 8'b01000000: y = 6'b100110; diff --git a/wally-pipelined/src/fpu/sbtm_div.sv b/wally-pipelined/src/fpu/sbtm_div.sv index 53b56dbd7..999106d86 100644 --- a/wally-pipelined/src/fpu/sbtm_div.sv +++ b/wally-pipelined/src/fpu/sbtm_div.sv @@ -1,3 +1,27 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup for divide portion of fpdivsqrt +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); // bit partitions diff --git a/wally-pipelined/src/fpu/sbtm_sqrt.sv b/wally-pipelined/src/fpu/sbtm_sqrt.sv index 27ffbeccf..fdf0bb6df 100644 --- a/wally-pipelined/src/fpu/sbtm_sqrt.sv +++ b/wally-pipelined/src/fpu/sbtm_sqrt.sv @@ -1,3 +1,27 @@ +/////////////////////////////////////////// +// +// Written: James Stine +// Modified: 8/1/2018 +// +// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); // bit partitions