From c8847b27e823905c131ebc776d866adfa6fe17fa Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 16 Aug 2021 13:06:09 -0400 Subject: [PATCH] all conversions go through the execute stage result mux --- wally-pipelined/src/fpu/fctrl.sv | 119 +++++++++++++-------------- wally-pipelined/src/fpu/fhazard.sv | 8 +- wally-pipelined/src/fpu/fma.sv | 6 +- wally-pipelined/src/fpu/fpu.sv | 41 +++++---- wally-pipelined/src/fpu/unpacking.sv | 4 +- 5 files changed, 87 insertions(+), 91 deletions(-) diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 6bdb4a81..a109ed67 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -8,9 +8,9 @@ module fctrl ( output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic FRegWriteD, // FP register write enable output logic FDivStartD, // Start division or squareroot - output logic [2:0] FResultSelD, // select result to be written to fp register + output logic [1:0] FResultSelD, // select result to be written to fp register output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit - output logic [1:0] FResSelD, // select one of the results done in the memory stage + output logic [2:0] FResSelD, // select one of the results done in the memory stage output logic [1:0] FIntResSelD, // select the result that will be written to the integer register output logic FmtD, // precision - single-0 double-1 output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude @@ -24,82 +24,81 @@ module fctrl ( case(OpD) // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_000_000_00_00_0_0; // flw - 3'b011: ControlsD = `FCTRLW'b1_0_000_001_00_00_0_0; // fld - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b1_0_00_000_000_00_0_0; // flw + 3'b011: ControlsD = `FCTRLW'b1_0_00_001_000_00_0_0; // fld + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_000_010_00_00_0_0; // fsw - 3'b011: ControlsD = `FCTRLW'b0_0_000_011_00_00_0_0; // fsd - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b0_0_00_010_000_00_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_00_011_000_00_0_0; // fsd + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_001_000_00_00_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_001_001_00_00_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_000_00_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_000_00_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_000_00_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_000_00_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_001_110_00_00_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_001_111_00_00_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv - 7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_000_00_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_000_00_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_000_00_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_000_00_1_0; // fdiv + 7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_000_00_1_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_100_000_01_00_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_100_001_01_00_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_100_010_01_00_0_0; // fsgnjx - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 3'b000: ControlsD = `FCTRLW'b1_0_11_000_001_00_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_11_001_001_00_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_11_010_001_00_0_0; // fsgnjx + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_100_111_10_00_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_100_101_10_00_0_0; // fmax - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 3'b000: ControlsD = `FCTRLW'b1_0_11_111_010_00_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_11_101_010_00_0_0; // fmax + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_100_010_00_00_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_100_001_00_00_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_100_011_00_00_0_0; // fle - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b0_1_11_010_000_00_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_11_001_000_00_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_11_011_000_00_0_0; // fle + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase - 7'b11100??: if (Funct3D == 3'b001) - ControlsD = `FCTRLW'b0_1_100_000_00_10_0_0; // fclass - else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_100_00_01_0_0; // fmv.x.w - else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_101_00_01_0_0; // fmv.x.d - else ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_000_10_0_0; // fclass + else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_000_01_0_0; // fmv.x.w + else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_000_01_0_0; // fmv.x.d + else ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction 7'b1101000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.s.w - 2'b01: ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.s.wu - 2'b10: ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.s.l - 2'b11: ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.s.lu - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.s.w + 2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.s.wu + 2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.s.l + 2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.s.lu + default: ControlsD = `FCTRLW'b0_0_00_0000_000_00_0_1; // non-implemented instruction endcase 7'b1100000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.s - 2'b01: ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.s - 2'b10: ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.s - 2'b11: ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.s - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.s + 2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.s + 2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.s + 2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.s + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase - 7'b1111000: ControlsD = `FCTRLW'b1_0_100_000_00_00_0_0; // fmv.w.x - 7'b0100000: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.s.d + 7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_000_00_0_0; // fmv.w.x + 7'b010000?: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.s.d 7'b1101001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.d.w - 2'b01: ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.d.wu - 2'b10: ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.d.l - 2'b11: ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.d.lu - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.d.w + 2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.d.wu + 2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.d.l + 2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.d.lu + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase 7'b1100001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.d - 2'b01: ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.d - 2'b10: ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.d - 2'b11: ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.d - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.d + 2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.d + 2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.d + 2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.d + default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction endcase - 7'b1111001: ControlsD = `FCTRLW'b1_0_100_001_00_00_0_0; // fmv.d.x - 7'b0100001: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.d.s - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + 7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_000_00_0_0; // fmv.d.x + //7'b0100001: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.d.s + default: ControlsD = `FCTRLW'b0_0_00_000_100_00_0_1; // non-implemented instruction endcase - default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_000_000_000_00_0_1; // non-implemented instruction endcase // unswizzle control bits @@ -117,7 +116,7 @@ module fctrl ( // Precision // 0-single // 1-double - assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : FResultSelD == 3'b010 ? Funct7D[0]^FOpCtrlD[1] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; + assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : FResSelD == 3'b100 | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; // FResultSel: // 000 - ReadRes - load diff --git a/wally-pipelined/src/fpu/fhazard.sv b/wally-pipelined/src/fpu/fhazard.sv index 88d3b8fb..e5331737 100644 --- a/wally-pipelined/src/fpu/fhazard.sv +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -29,7 +29,7 @@ module fhazard( input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses input logic FRegWriteM, FRegWriteW, // is the fp register being written to input logic [4:0] RdM, RdW, // the adress being written to - input logic [2:0] FResultSelM, // the result being selected + input logic [1:0] FResultSelM, // the result being selected output logic FStallD, // stall the decode stage output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value ); @@ -45,7 +45,7 @@ module fhazard( // if the needed value is in the memory stage - input 1 if ((Adr1E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM + if(FResultSelM == 3'b11) FForwardXE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W @@ -54,7 +54,7 @@ module fhazard( // if the needed value is in the memory stage - input 2 if ((Adr2E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM + if(FResultSelM == 3'b11) FForwardYE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W @@ -63,7 +63,7 @@ module fhazard( // if the needed value is in the memory stage - input 3 if ((Adr3E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM + if(FResultSelM == 3'b11) FForwardZE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 466a6c40..51bb94af 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -36,7 +36,7 @@ module fma( input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage - input logic XSgnM, YSgnM, ZSgnM, // input signs - memory stage + input logic XSgnM, YSgnM, // input signs - memory stage input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - memory stage input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage input logic XDenormE, YDenormE, ZDenormE, // is denorm @@ -83,7 +83,7 @@ module fma( {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE}, {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM}); - fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + fma2 fma2(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, .FOpCtrlM, .FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .FMAResM, .FMAFlgM); @@ -217,7 +217,7 @@ endmodule module fma2( - input logic XSgnM, YSgnM, ZSgnM, + input logic XSgnM, YSgnM, input logic [`NE-1:0] XExpM, YExpM, ZExpM, input logic [`NF:0] XManM, YManM, ZManM, input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 14545f34..931c5a02 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -66,9 +66,9 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division or squareroot logic FWriteIntD; // Write to integer register logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register + logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input @@ -81,7 +81,7 @@ module fpu ( // unpacking signals logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM, ZSgnM; // input's sign - memory stage + logic XSgnM, YSgnM; // input's sign - memory stage logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage @@ -97,8 +97,7 @@ module fpu ( logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE,YNormE; // is normal - logic XNormM,YNormM; // is normal + logic XNormE; // is normal // result and flag signals @@ -200,18 +199,18 @@ module fpu ( mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); - mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b001)}, FSrcYE); // Force Z to be 0 for multiply instructions + mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions // unpacking unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FResultSelE, .FmtE, + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, // outputs: .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE, .YNormE); + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // FMA // - two stage FMA @@ -223,7 +222,7 @@ module fpu ( fma fma (.clk, .reset, .FlushM, .StallM, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, .FOpCtrlE, .FOpCtrlM, @@ -266,7 +265,7 @@ module fpu ( //*** change to use the unpacking unit if possible // faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, // .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM, -// .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, +// .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, // // outputs: // .CvtFpResM, .CvtFpFlgM); @@ -311,8 +310,8 @@ module fpu ( mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); // select a result that may be written to the FP register - mux4 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE); - mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, FResSelE, FFlgE); + mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); + mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); // select the result that may be written to the integer register - to IEU mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); @@ -327,7 +326,7 @@ module fpu ( // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); - flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM}); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); @@ -338,17 +337,17 @@ module fpu ( flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); // flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM); - flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM); + //flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM); + //flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM); // flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); // flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); // flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - flopenrc #(14) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE}, - {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM}); + flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); @@ -361,7 +360,7 @@ module fpu ( // FPU flag selection - to privileged - mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); + mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); @@ -374,7 +373,7 @@ module fpu ( flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, + flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); @@ -391,7 +390,7 @@ module fpu ( mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); // select the result to be written to the FP register - mux5 #(64) FPUResultMux(ReadResW, FMAResW, CvtFpResW, FDivResW, FResW, FResultSelW, FPUResultW); + mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv index 3913b06b..3f80ee03 100644 --- a/wally-pipelined/src/fpu/unpacking.sv +++ b/wally-pipelined/src/fpu/unpacking.sv @@ -1,12 +1,11 @@ module unpacking ( input logic [63:0] X, Y, Z, input logic FmtE, - input logic [2:0] FResultSelE, input logic [2:0] FOpCtrlE, output logic XSgnE, YSgnE, ZSgnE, output logic [10:0] XExpE, YExpE, ZExpE, output logic [52:0] XManE, YManE, ZManE, - output logic XNormE, YNormE, + output logic XNormE, output logic XNaNE, YNaNE, ZNaNE, output logic XSNaNE, YSNaNE, ZSNaNE, output logic XDenormE, YDenormE, ZDenormE, @@ -55,7 +54,6 @@ module unpacking ( assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23]; assign XNormE = ~(XExpMaxE|XExpZero); - assign YNormE = ~YExpZero; // only used in addcvt - checks inf and NaN seperately assign XNaNE = XExpMaxE & ~XFracZero; assign YNaNE = YExpMaxE & ~YFracZero;