///////////////////////////////////////////
// fctrl.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: floating-point control unit
// 
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// 
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the 
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
// either express or implied. See the License for the specific language governing permissions 
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"

module fctrl (
  input  logic                clk,
  input  logic                reset,
  // input control signals
  input  logic                StallE, StallM, StallW,             // stall signals
  input  logic                FlushE, FlushM, FlushW,             // flush signals
  input  logic                IntDivE,                            // is inteteger division
  input  logic [2:0]          FRM_REGW,                           // rounding mode from CSR
  input  logic [1:0]          STATUS_FS,                          // is FPU enabled?
  input  logic                FDivBusyE,                          // is the divider busy
  // intruction                                                   
  input  logic [31:0]         InstrD,                             // the full instruction
  input  logic [6:0]          Funct7D,                            // bits 31:25 of instruction - may contain percision
  input  logic [6:0]          OpD,                                // bits 6:0 of instruction
  input  logic [4:0]          Rs2D,                               // bits 24:20 of instruction
  input  logic [2:0]          Funct3D, Funct3E,                   // bits 14:12 of instruction - may contain rounding mode
  // input mux selections                                         
  output logic                XEnD, YEnD, ZEnD,                   // enable inputs
  output logic                XEnE, YEnE, ZEnE,                   // enable inputs
  // opperation mux selections                                    
  output logic                FCvtIntE, FCvtIntW,                 // convert to integer opperation
  output logic [2:0]          FrmM,                               // FP rounding mode
  output logic [`FMTBITS-1:0] FmtE, FmtM,                         // FP format
  output logic [2:0]          OpCtrlE, OpCtrlM,                   // Select which opperation to do in each component
  output logic                FpLoadStoreM,                       // FP load or store instruction
  output logic [1:0]          PostProcSelE, PostProcSelM,         // select result in the post processing unit
  output logic [1:0]          FResSelE, FResSelM, FResSelW,       // Select one of the results that finish in the memory stage
  // register control signals
  output logic                FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
  output logic                FWriteIntE, FWriteIntM,             // Write to integer register
  output logic [4:0]          Adr1D, Adr2D, Adr3D,                // adresses of each input
  output logic [4:0]          Adr1E, Adr2E, Adr3E,                // adresses of each input
  // other control signals
  output logic                IllegalFPUInstrD,                   // Is the instruction an illegal fpu instruction
  output logic                FDivStartE, IDivStartE              // Start division or squareroot
  );

  `define FCTRLW 12

  logic [`FCTRLW-1:0]         ControlsD;          // control signals
  logic                       FRegWriteD;         // FP register write enable
  logic                       FDivStartD;         // start division/sqrt
  logic                       FWriteIntD;         // integer register write enable
  logic [2:0]                 OpCtrlD;            // Select which opperation to do in each component
  logic [1:0]                 PostProcSelD;       // select result in the post processing unit
  logic [1:0]                 FResSelD;           // Select one of the results that finish in the memory stage
  logic [2:0]                 FrmD, FrmE;         // FP rounding mode
  logic [`FMTBITS-1:0]        FmtD;               // FP format
  logic [1:0]                 Fmt, Fmt2;          // format - before possible reduction
  logic                       SupportedFmt;       // is the format supported
  logic                       SupportedFmt2;      // is the source format supported for fp -> fp
  logic                       FCvtIntD, FCvtIntM; // convert to integer opperation

  // FPU Instruction Decoder
  assign Fmt = Funct7D[1:0];
  assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp

  assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
                         (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
  assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
                         (Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));

  // decode the instruction                       
  // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
  always_comb
    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
      ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0;
    else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) 
      ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // for anything other than loads and stores, check for supported format
    else begin 
      ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // default: non-implemented instruction
      /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed
      case(OpD)
        7'b0000111: case(Funct3D)
                      3'b010:                      ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw
                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
                    endcase
        7'b0100111: case(Funct3D)
                      3'b010:                      ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw
                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
                    endcase
        7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
        7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
        7'b1001011:   ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub
        7'b1001111:   ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd
        7'b1010011: casez(Funct7D)
                      7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd
                      7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub
                      7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul
                      7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv
                      7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt
                      7'b00100??: case(Funct3D)
                                    3'b000:  ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0; // fsgnj
                                    3'b001:  ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0; // fsgnjn
                                    3'b010:  ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0; // fsgnjx
                                  endcase
                      7'b00101??: case(Funct3D)
                                    3'b000:  ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0; // fmin
                                    3'b001:  ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0; // fmax
                                  endcase
                      7'b10100??: case(Funct3D)
                                    3'b010:  ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0; // feq
                                    3'b001:  ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0; // flt
                                    3'b000:  ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0; // fle
                                  endcase
                      7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
                                                ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass
                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register
                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x   to fp reg
                      7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
                                                ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
                      7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)
                                                ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q)
                      // coverage off
                      // Not covered in testing because rv64gc does not support half or quad precision
                      7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10)
                                                ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q)
                      7'b0100011: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b11)
                                                ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d)
                      // coverage on
                      7'b1101000: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w   w->s
                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s
                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l   l->s
                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s
                                  endcase
                      7'b1100000: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s   s->w
                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s  s->wu
                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s   s->l
                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s  s->lu
                                  endcase
                      7'b1101001: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w   w->d
                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d
                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l   l->d
                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d
                                  endcase
                      7'b1100001: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d   d->w
                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d  d->wu
                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d   d->l
                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d  d->lu
                                  endcase
                      // coverage off
                      // Not covered in testing because rv64gc does not support half or quad precision
                      7'b1101010: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w   w->h
                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h
                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l   l->h
                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h
                                  endcase
                      7'b1100010: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h   h->w
                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h  h->wu
                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h   h->l
                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h  h->lu
                                  endcase
                      7'b1101011: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w   w->q
                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q
                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l   l->q
                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q
                                  endcase
                      7'b1100011: case(Rs2D)
                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q   q->w
                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q  q->wu
                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q   q->l
                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q  q->lu
                                  endcase
                      // coverage on
                    endcase
      endcase
    end
    /* verilator lint_on CASEINCOMPLETE */

  // unswizzle control bits
  assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD;
  
  // rounding modes:
  //    000 - round to nearest, ties to even
  //    001 - round twords 0 - round to min magnitude
  //    010 - round down - round twords negitive infinity
  //    011 - round up - round twords positive infinity
  //    100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero
  //    111 - dynamic - choose FRM_REGW as rounding mode
  assign FrmD = &Funct3D ? FRM_REGW : Funct3D;

  // Precision
  //    00 - single
  //    01 - double
  //    10 - half
  //    11 - quad
  
    if (`FPSIZES == 1)
      assign FmtD = 0;
    else if (`FPSIZES == 2)begin
      logic [1:0] FmtTmp;
      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
      assign FmtD = (`FMT == FmtTmp);
    end
    else if (`FPSIZES == 3|`FPSIZES == 4)
      assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];

  // Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
  // When disabled infinity and NaN on source registers are ignored by the unpacker and thus special case logic.
  
  //    X - all except int->fp, store, load, mv int->fp
  assign XEnD = ~(((FResSelD==2'b10)&~FWriteIntD)|                                                 // load/store
                  ((FResSelD==2'b00)&FRegWriteD&(OpCtrlD==3'b011))|                             // mv int to float - There was an issue here, this condition was not refering to mv int -> fp  // ((FResSelD==2'b11)&FRegWriteD)|
                  ((FResSelD==2'b01)&(PostProcSelD==2'b00)&OpCtrlD[2]));                           // cvt int to float

  //    Y - all except cvt, mv, load, class, sqrt
  assign YEnD = ~(((FResSelD==2'b10)&(FWriteIntD|FRegWriteD))|                                     // load or class 
                  ((FResSelD==2'b00)&FRegWriteD&(OpCtrlD==3'b011))|                                // mv int to float as above // previously mv both ways - Another issue here, previously (FResSelD==2'b11)| does not cover mv both way int-> fp and fp-> int
                  ((FResSelD==2'b11)&(PostProcSelD==2'b00))|                                    // mv float to int                   // mv both ways
                  ((FResSelD==2'b01)&((PostProcSelD==2'b00)|((PostProcSelD==2'b01)&OpCtrlD[0])))); // cvt both or sqrt

                  // Removed (FResSelD==2'b11)| removed to avoid redundancy 

  //    Z - fma ops only
  assign ZEnD = (PostProcSelD==2'b10)&(~OpCtrlD[2]|OpCtrlD[1]);                  // fma, add, sub   // Removed &(FResSelD==2'b01) because it' redundant, Changed all the xx PostProcSelD to 00 to avoid unnecessary contention errors.


  //  Final Res Sel:
  //        fp      int
  //  00  other     cmp
  //  01  postproc  cvt
  //  10  store     class
  //  11            mv

  //  post processing Sel:
  //  00  cvt
  //  01  div
  //  10  fma

  //  Other Sel:
  //    Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
  //        000 - sign            00
  //        001 - negate sign     00
  //        010 - xor sign        00
  //        011 - mv to fp        01
  //        110 - min             10
  //        101 - max             10

  //  OpCtrl:
  //    Fma: {not multiply-add?, negate prod?, negate Z?}
  //        000 - fmadd
  //        001 - fmsub
  //        010 - fnmsub
  //        011 - fnmadd
  //        100 - mul
  //        110 - add
  //        111 - sub
  //    Div: 
  //        0 - div
  //        1 - sqrt
  //    Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
  //    Cvt Fp: output format
  //        10 - to half
  //        00 - to single
  //        01 - to double
  //        11 - to quad
  //    Cmp: {equal?, less than?}
  //        010 - eq
  //        001 - lt
  //        011 - le
  //        110 - min
  //        101 - max
  //    Sgn:
  //        00 - sign
  //        01 - negate sign
  //        10 - xor sign

  // rename input adresses for readability
  assign Adr1D = InstrD[19:15];
  assign Adr2D = InstrD[24:20];
  assign Adr3D = InstrD[31:27];
 
  // D/E pipleine register
  flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
  flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
  flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
  flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});

  // Integer division on FPU divider
  if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
  else                             assign IDivStartE = 0; 

  // E/M pipleine register
  flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
              {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE},
              {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM});
  
  // renameing for readability
  assign FpLoadStoreM = FResSelM[1];

  // M/W pipleine register
  flopenrc #(4)  MWCtrlReg(clk, reset, FlushW, ~StallW,
          {FRegWriteM, FResSelM, FCvtIntM},
          {FRegWriteW, FResSelW, FCvtIntW});
 
endmodule