mirror of
				https://github.com/openhwgroup/cvw
				synced 2025-02-11 06:05:49 +00:00 
			
		
		
		
	fpu paramaterized - except fdivsqrt
This commit is contained in:
		
							parent
							
								
									1d8bc2dc1b
								
							
						
					
					
						commit
						9a09ee3a35
					
				@ -35,11 +35,10 @@
 | 
			
		||||
`define XLEN 64
 | 
			
		||||
 | 
			
		||||
// IEEE 754 compliance
 | 
			
		||||
`define IEEE754 1
 | 
			
		||||
`define IEEE754 0
 | 
			
		||||
 | 
			
		||||
// MISA RISC-V configuration per specification
 | 
			
		||||
//16 - quad 3 - double 5 - single
 | 
			
		||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 0 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
 | 
			
		||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
 | 
			
		||||
`define ZICSR_SUPPORTED 1
 | 
			
		||||
`define ZIFENCEI_SUPPORTED 1
 | 
			
		||||
`define COUNTERS 32
 | 
			
		||||
@ -52,9 +51,11 @@
 | 
			
		||||
`define UARCH_SINGLECYCLE 0
 | 
			
		||||
`define DMEM `MEM_CACHE
 | 
			
		||||
`define IMEM `MEM_CACHE
 | 
			
		||||
`define DBUS 1
 | 
			
		||||
`define IBUS 1
 | 
			
		||||
`define VIRTMEM_SUPPORTED 1
 | 
			
		||||
`define VECTORED_INTERRUPTS_SUPPORTED 1 
 | 
			
		||||
`define BIGENDIAN_SUPPORTED 0
 | 
			
		||||
`define BIGENDIAN_SUPPORTED 1
 | 
			
		||||
 | 
			
		||||
// TLB configuration.  Entries should be a power of 2
 | 
			
		||||
`define ITLB_ENTRIES 32
 | 
			
		||||
@ -82,13 +83,13 @@
 | 
			
		||||
// Bus Interface width
 | 
			
		||||
`define AHBW 64
 | 
			
		||||
 | 
			
		||||
// WFI Timeout Wait
 | 
			
		||||
`define WFI_TIMEOUT_BIT 16
 | 
			
		||||
 | 
			
		||||
// Peripheral Physiccal Addresses
 | 
			
		||||
// Peripheral memory space extends from BASE to BASE+RANGE
 | 
			
		||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
 | 
			
		||||
 | 
			
		||||
// WFI Timeout Wait
 | 
			
		||||
`define WFI_TIMEOUT_BIT 16
 | 
			
		||||
 | 
			
		||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
 | 
			
		||||
`define BOOTROM_SUPPORTED 1'b1
 | 
			
		||||
`define BOOTROM_BASE   56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
 | 
			
		||||
@ -130,13 +131,12 @@
 | 
			
		||||
`define PLIC_GPIO_ID 3
 | 
			
		||||
`define PLIC_UART_ID 10
 | 
			
		||||
 | 
			
		||||
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
 | 
			
		||||
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
 | 
			
		||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
 | 
			
		||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
 | 
			
		||||
`define BPRED_ENABLED 1
 | 
			
		||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 | 
			
		||||
`define TESTSBP 0
 | 
			
		||||
`define BPRED_SIZE 10
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
`define REPLAY 0
 | 
			
		||||
`define HPTW_WRITES_SUPPORTED 0
 | 
			
		||||
 | 
			
		||||
@ -69,14 +69,15 @@
 | 
			
		||||
`define H_BIAS 15
 | 
			
		||||
 | 
			
		||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
 | 
			
		||||
`define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
 | 
			
		||||
`define FLEN ($unsigned(`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN))
 | 
			
		||||
`define NE   (`Q_SUPPORTED ? `Q_NE   : `D_SUPPORTED ? `D_NE   : `F_SUPPORTED ? `S_NE   : `H_NE)
 | 
			
		||||
`define NF   (`Q_SUPPORTED ? `Q_NF   : `D_SUPPORTED ? `D_NF   : `F_SUPPORTED ? `S_NF   : `H_NF)
 | 
			
		||||
`define FMT  (`Q_SUPPORTED ? 3       : `D_SUPPORTED ? 1       : `F_SUPPORTED ? 0       : 2)
 | 
			
		||||
`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
 | 
			
		||||
 | 
			
		||||
// Floating point constants needed for FPU paramerterization
 | 
			
		||||
`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
 | 
			
		||||
`define FPSIZES ((3)'(`Q_SUPPORTED)+(3)'(`D_SUPPORTED)+(3)'(`F_SUPPORTED)+(3)'(`ZFH_SUPPORTED))
 | 
			
		||||
`define FMTBITS (((`FPSIZES==3'b011)|(`FPSIZES==3'b100)) ? 2 : 1)
 | 
			
		||||
`define LEN1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN  : `H_LEN)
 | 
			
		||||
`define NE1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE  : `H_NE)
 | 
			
		||||
`define NF1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
 | 
			
		||||
verilator=`which verilator`
 | 
			
		||||
 | 
			
		||||
basepath=$(dirname $0)/..
 | 
			
		||||
for config in rv32e rv64gc rv32gc rv32ic ; do
 | 
			
		||||
for config in rv64fp rv32e rv64gc rv32gc rv32ic; do
 | 
			
		||||
    echo "$config linting..."
 | 
			
		||||
    if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
 | 
			
		||||
        echo "Exiting after $config lint due to errors or warnings"
 | 
			
		||||
 | 
			
		||||
@ -1,23 +0,0 @@
 | 
			
		||||
# Makefile
 | 
			
		||||
 | 
			
		||||
CC     = gcc
 | 
			
		||||
CFLAGS = -O3
 | 
			
		||||
LIBS   = -lm
 | 
			
		||||
LFLAGS = -L. 
 | 
			
		||||
# Link against the riscv-isa-sim version of SoftFloat rather than 
 | 
			
		||||
# the regular version to get RISC-V NaN behavior
 | 
			
		||||
IFLAGS   = -I$(RISCV)/riscv-isa-sim/softfloat
 | 
			
		||||
LIBS   = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
 | 
			
		||||
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
 | 
			
		||||
#LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
 | 
			
		||||
SRCS   = $(wildcard *.c)
 | 
			
		||||
 | 
			
		||||
PROGS = $(patsubst %.c,%,$(SRCS))
 | 
			
		||||
 | 
			
		||||
all:	$(PROGS)
 | 
			
		||||
 | 
			
		||||
%: %.c
 | 
			
		||||
	$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
 | 
			
		||||
 | 
			
		||||
clean: 
 | 
			
		||||
	rm -f $(PROGS)
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,23 +0,0 @@
 | 
			
		||||
# fma.do 
 | 
			
		||||
#
 | 
			
		||||
# run with vsim -do "do fma.do"
 | 
			
		||||
# add -c before -do for batch simulation
 | 
			
		||||
 | 
			
		||||
onbreak {resume}
 | 
			
		||||
 | 
			
		||||
# create library
 | 
			
		||||
vlib worklib
 | 
			
		||||
 | 
			
		||||
vlog -lint -sv -work worklib fma16.v testbench.v
 | 
			
		||||
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
 | 
			
		||||
vsim -lib worklib testbenchopt
 | 
			
		||||
 | 
			
		||||
add wave sim:/testbench_fma16/clk
 | 
			
		||||
add wave sim:/testbench_fma16/reset
 | 
			
		||||
add wave sim:/testbench_fma16/x
 | 
			
		||||
add wave sim:/testbench_fma16/y
 | 
			
		||||
add wave sim:/testbench_fma16/z
 | 
			
		||||
add wave sim:/testbench_fma16/result
 | 
			
		||||
add wave sim:/testbench_fma16/rexpected
 | 
			
		||||
 | 
			
		||||
run -all
 | 
			
		||||
@ -1,268 +0,0 @@
 | 
			
		||||
// fma16.sv
 | 
			
		||||
// David_Harris@hmc.edu 26 February 2022
 | 
			
		||||
// 16-bit floating-point multiply-accumulate
 | 
			
		||||
 | 
			
		||||
// Operation: general purpose multiply, add, fma, with optional negation
 | 
			
		||||
//   If mul=1, p = x * y.  Else p = x.
 | 
			
		||||
//   If add=1, result = p + z.  Else result = p.
 | 
			
		||||
//   If negr or negz = 1, negate result or z to handle negations and subtractions
 | 
			
		||||
//   fadd: mul = 0, add = 1, negr = negz = 0
 | 
			
		||||
//   fsub: mul = 0, add = 1, negr = 0, negz = 1
 | 
			
		||||
//   fmul: mul = 1, add = 0, negr = 0, negz = 0
 | 
			
		||||
//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
 | 
			
		||||
//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
 | 
			
		||||
//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
 | 
			
		||||
//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
 | 
			
		||||
 | 
			
		||||
`define FFLEN 16
 | 
			
		||||
`define Nf 10
 | 
			
		||||
`define Ne 5
 | 
			
		||||
`define BIAS 15
 | 
			
		||||
`define EMIN (-(2**(`Ne-1)-1))
 | 
			
		||||
`define EMAX (2**(`Ne-1)-1)
 | 
			
		||||
 | 
			
		||||
`define NaN 16'h7E00
 | 
			
		||||
`define INF 15'h7C00
 | 
			
		||||
 | 
			
		||||
// rounding modes *** update
 | 
			
		||||
`define RZ  3'b00
 | 
			
		||||
`define RNE 3'b01
 | 
			
		||||
`define RM  3'b10
 | 
			
		||||
`define RP  3'b11
 | 
			
		||||
 | 
			
		||||
module fma16(
 | 
			
		||||
  input  logic [`FFLEN-1:0] x, y, z,
 | 
			
		||||
  input  logic        mul, add, negr, negz,
 | 
			
		||||
  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
 | 
			
		||||
  output logic [`FFLEN-1:0] result);
 | 
			
		||||
 
 | 
			
		||||
  logic [`Nf:0] xm, ym, zm; // U1.Nf
 | 
			
		||||
  logic [`Ne-1:0]  xe, ye, ze; // B_Ne
 | 
			
		||||
  logic        xs, ys, zs;
 | 
			
		||||
  logic        zs1; // sign before optional negation
 | 
			
		||||
  logic [2*`Nf+1:0] pm; // U2.2Nf
 | 
			
		||||
  logic [`Ne:0]  pe; // B_Ne+1
 | 
			
		||||
  logic        ps;  // sign of product
 | 
			
		||||
  logic [22:0] rm;
 | 
			
		||||
  logic [`Ne+1:0]  re;
 | 
			
		||||
  logic        rs;
 | 
			
		||||
  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
 | 
			
		||||
  logic [`Ne+1:0]  re2;
 | 
			
		||||
 | 
			
		||||
  unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);  // unpack inputs
 | 
			
		||||
  //signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs);             // handle negations
 | 
			
		||||
  mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps);                       // p = x * y
 | 
			
		||||
  add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs);             // r = z + p
 | 
			
		||||
  postproc16 post(roundmode,  xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result);                 // normalize, round, pack
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module mult16(
 | 
			
		||||
  input  logic        mul,
 | 
			
		||||
  input  logic [`Nf:0] xm, ym,
 | 
			
		||||
  input  logic [`Ne-1:0]  xe, ye,
 | 
			
		||||
  input  logic        xs, ys,
 | 
			
		||||
  output logic [2*`Nf+1:0] pm,
 | 
			
		||||
  output logic [`Ne:0]  pe,
 | 
			
		||||
  output logic        ps);
 | 
			
		||||
 | 
			
		||||
  // only multiply if mul = 1
 | 
			
		||||
  assign pm = mul ? xm * ym : {1'b0, xm, 10'b0};       // multiply mantiassas 
 | 
			
		||||
  assign pe = mul ? xe + ye - `BIAS : {1'b0, xe};      // add exponents, account for bias
 | 
			
		||||
  assign ps = xs ^ ys;                                 // negative if X xor Y are negative
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module add16(
 | 
			
		||||
  input  logic        add,
 | 
			
		||||
  input  logic [2*`Nf+1:0] pm,  // U2.2Nf
 | 
			
		||||
  input  logic [`Nf:0] zm, // U1.Nf
 | 
			
		||||
  input  logic [`Ne:0]  pe, // B_Ne+1
 | 
			
		||||
  input  logic [`Ne-1:0]  ze, // B_Ne
 | 
			
		||||
  input  logic        ps, zs, 
 | 
			
		||||
  input  logic        negz,
 | 
			
		||||
  output logic [22:0] rm,
 | 
			
		||||
  output logic [`Ne+1:0]  re, // B_Ne+2
 | 
			
		||||
  output logic [`Ne+1:0]  re2,
 | 
			
		||||
  output logic        rs);
 | 
			
		||||
 | 
			
		||||
  logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
 | 
			
		||||
  logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
 | 
			
		||||
  logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.  
 | 
			
		||||
  logic [`Nf-1:0] prezsticky;
 | 
			
		||||
  logic           zsticky;
 | 
			
		||||
  logic          effectivesub;
 | 
			
		||||
  logic           rs0;
 | 
			
		||||
  logic [`Ne:0]     leadingzeros, NormCnt; // *** should paramterize size
 | 
			
		||||
  logic [`Ne:0]   re1;
 | 
			
		||||
 | 
			
		||||
  // Alignment shift
 | 
			
		||||
  assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
 | 
			
		||||
  assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
 | 
			
		||||
  always_comb // AlignCount mux; see Muller page 254
 | 
			
		||||
    if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7;         re = {1'b0, pe}; end
 | 
			
		||||
    else if (ExpDiff <= 2)       begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
 | 
			
		||||
    else if (ExpDiff <= `Nf+3)   begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
 | 
			
		||||
    else                         begin AlignCnt = 0;                 re = {2'b0, ze}; end
 | 
			
		||||
  // Shift Zm right by AlignCnt.  Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
 | 
			
		||||
  assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
 | 
			
		||||
  assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
 | 
			
		||||
  
 | 
			
		||||
  // Effective subtraction
 | 
			
		||||
  assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
 | 
			
		||||
  assign zalignedaddsub = effectivesub ? ~zaligned : zaligned;  // invert zaligned for subtraction
 | 
			
		||||
 | 
			
		||||
  // Adder
 | 
			
		||||
  assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
 | 
			
		||||
  assign rs0 = r[`Nf*3+7]; // sign of the initial result
 | 
			
		||||
  assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
 | 
			
		||||
 | 
			
		||||
  // Sign Logic
 | 
			
		||||
  assign rs = ps ^ rs0; // flip the sign if necessary
 | 
			
		||||
 | 
			
		||||
  // Leading zero counter
 | 
			
		||||
  lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
 | 
			
		||||
  assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
 | 
			
		||||
 | 
			
		||||
  // Normalization shift
 | 
			
		||||
  always_comb // NormCount mux
 | 
			
		||||
    if (ExpDiff < 3) begin 
 | 
			
		||||
      if (re1 >= `EMIN) begin  NormCnt = `Nf + 3 + leadingzeros;  re2 = {1'b0, re1}; end
 | 
			
		||||
      else              begin  NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN;    end
 | 
			
		||||
    end else            begin  NormCnt = AlignCnt; re = {2'b00, ze};                  end
 | 
			
		||||
  assign rnormed = r2 << NormCnt; // *** update sticky
 | 
			
		||||
  /* temporarily comment out to start synth
 | 
			
		||||
 | 
			
		||||
  // One-bit secondary normalization
 | 
			
		||||
  if (ExpDiff <= 2)          begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
 | 
			
		||||
  else begin // *** handle sticky
 | 
			
		||||
    if (rnormed[***])        begin rnormed2 = rnormed >> 1; re2 = re+1; end
 | 
			
		||||
    else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re;        end
 | 
			
		||||
    else                     begin rnormed2 = rnormed << 1; re2 = re-1; end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  // round
 | 
			
		||||
  assign l = rnormed2[***]; // least significant bit 
 | 
			
		||||
  assign r = rnormed2[***-1]; // rounding bit
 | 
			
		||||
  assign s = ***; // sticky bit
 | 
			
		||||
  always_comb
 | 
			
		||||
    case (roundmode)
 | 
			
		||||
      RZ: roundup = 0;
 | 
			
		||||
      RP: roundup = ~rs & (r | s); 
 | 
			
		||||
      RM: roundup = rs & (r | s);
 | 
			
		||||
      RNE: roundup = r & (s | l);
 | 
			
		||||
      default: roundup = 0;
 | 
			
		||||
    endcase
 | 
			
		||||
  assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
  // *** need to handle rounding to MAXNUM vs. INFINITY
 | 
			
		||||
  
 | 
			
		||||
  // add or pass product through
 | 
			
		||||
 /* assign rm = add ? arm : {1'b0, pm};
 | 
			
		||||
  assign re = add ? are : {1'b0, pe};
 | 
			
		||||
  assign rs = add ? ars : ps; */
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module lzc(
 | 
			
		||||
  input  logic [`Nf*3+7:0] r2,
 | 
			
		||||
  output logic [`Ne:0]   leadingzeros
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
module postproc16(
 | 
			
		||||
  input  logic [1:0] roundmode,
 | 
			
		||||
  input  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
 | 
			
		||||
  input  logic [22:0] rm, 
 | 
			
		||||
  input  logic [`Nf:0] zm, // U1.Nf
 | 
			
		||||
  input  logic [6:0]  re, 
 | 
			
		||||
  input  logic [`Ne-1:0]  ze, // B_Ne
 | 
			
		||||
  input  logic        rs, zs, ps,
 | 
			
		||||
  input  logic [`Ne+1:0]  re2,
 | 
			
		||||
  output logic [15:0] result);
 | 
			
		||||
 | 
			
		||||
  logic [9:0] uf, uff;
 | 
			
		||||
  logic [6:0] ue;
 | 
			
		||||
  logic [6:0] ueb, uebiased;
 | 
			
		||||
  logic       invalid;
 | 
			
		||||
 | 
			
		||||
    // Special cases
 | 
			
		||||
  // *** not handling signaling NaN
 | 
			
		||||
  // *** also add overflow/underflow/inexact
 | 
			
		||||
  always_comb begin
 | 
			
		||||
    if (xnan | ynan | znan)                    begin result = `NaN; invalid = 0; end // propagate NANs
 | 
			
		||||
    else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
 | 
			
		||||
    else if (xzero & yinf | xinf & yzero)      begin result = `NaN; invalid = 1; end // zero times infinity
 | 
			
		||||
    else if (xinf | yinf)                      begin result = {ps, `INF}; invalid = 0; end // X or Y
 | 
			
		||||
    else if (zinf)                             begin result = {zs, `INF}; invalid = 0; end // infinite Z
 | 
			
		||||
    else if (xzero | yzero)                    begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
 | 
			
		||||
    else if (re2 >= `EMAX)                     begin result = {rs, `INF}; invalid = 0; end
 | 
			
		||||
    else                                       begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
 | 
			
		||||
  end
 | 
			
		||||
  
 | 
			
		||||
  always_comb 
 | 
			
		||||
    if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
 | 
			
		||||
        ue = re + 7'b1;
 | 
			
		||||
        uf = rm[20:11];
 | 
			
		||||
    end else begin // no normalization shift needed
 | 
			
		||||
        ue = re;
 | 
			
		||||
        uf = rm[19:10];
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
  // overflow
 | 
			
		||||
  always_comb begin
 | 
			
		||||
    ueb = ue-7'd15;
 | 
			
		||||
    if (ue >= 7'd46) begin // overflow
 | 
			
		||||
/*      uebiased = 7'd30;
 | 
			
		||||
      uff = 10'h3ff; */
 | 
			
		||||
    end else begin
 | 
			
		||||
      uebiased = ue-7'd15;
 | 
			
		||||
      uff = uf;
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
  
 | 
			
		||||
  assign result = {rs, uebiased[4:0], uff};
 | 
			
		||||
 | 
			
		||||
  // add special case handling for zeros, NaN, Infinity
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module signadj16(
 | 
			
		||||
  input  logic negr, negz,
 | 
			
		||||
  input  logic xs, ys, zs1,
 | 
			
		||||
  output logic ps, zs);
 | 
			
		||||
 | 
			
		||||
  assign ps = xs ^ ys; // sign of product
 | 
			
		||||
  assign zs = zs1 ^ negz; // sign of addend
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module unpack16(
 | 
			
		||||
  input  logic [15:0] x, y, z,
 | 
			
		||||
  output logic [10:0] xm, ym, zm,
 | 
			
		||||
  output logic [4:0]  xe, ye, ze,
 | 
			
		||||
  output logic        xs, ys, zs,
 | 
			
		||||
  output logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
 | 
			
		||||
 | 
			
		||||
  unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
 | 
			
		||||
  unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
 | 
			
		||||
  unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module unpacknum16(
 | 
			
		||||
  input logic  [15:0] num,
 | 
			
		||||
  output logic [10:0] m,
 | 
			
		||||
  output logic [4:0]  e,
 | 
			
		||||
  output logic        s, 
 | 
			
		||||
  output logic        zero, inf, nan);
 | 
			
		||||
 | 
			
		||||
  logic [9:0] f;  // fraction without leading 1
 | 
			
		||||
  logic [4:0] eb; // biased exponent
 | 
			
		||||
 | 
			
		||||
  assign {s, eb, f} = num; // pull bit fields out of floating-point number
 | 
			
		||||
  assign m = {1'b1, f}; // prepend leading 1 to fraction
 | 
			
		||||
  assign e = eb;   // leave bias in exponent ***
 | 
			
		||||
  assign zero = (e == 0 && f == 0);
 | 
			
		||||
  assign inf = (e == 31 && f == 0);
 | 
			
		||||
  assign nan = (e == 31 && f != 0);
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,24 +0,0 @@
 | 
			
		||||
// fma16.sv
 | 
			
		||||
// David_Harris@hmc.edu 26 February 2022
 | 
			
		||||
// 16-bit floating-point multiply-accumulate
 | 
			
		||||
 | 
			
		||||
// Operation: general purpose multiply, add, fma, with optional negation
 | 
			
		||||
//   If mul=1, p = x * y.  Else p = x.
 | 
			
		||||
//   If add=1, result = p + z.  Else result = p.
 | 
			
		||||
//   If negr or negz = 1, negate result or z to handle negations and subtractions
 | 
			
		||||
//   fadd: mul = 0, add = 1, negr = negz = 0
 | 
			
		||||
//   fsub: mul = 0, add = 1, negr = 0, negz = 1
 | 
			
		||||
//   fmul: mul = 1, add = 0, negr = 0, negz = 0
 | 
			
		||||
//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
 | 
			
		||||
//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
 | 
			
		||||
//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
 | 
			
		||||
//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
 | 
			
		||||
 | 
			
		||||
module fma16(
 | 
			
		||||
  input  logic [15:0] x, y, z,
 | 
			
		||||
  input  logic        mul, add, negr, negz,
 | 
			
		||||
  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
 | 
			
		||||
  output logic [15:0] result);
 | 
			
		||||
 
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
@ -1,240 +0,0 @@
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include "softfloat.h"
 | 
			
		||||
#include "softfloat_types.h"
 | 
			
		||||
 | 
			
		||||
typedef union sp {
 | 
			
		||||
  float32_t v;
 | 
			
		||||
  float f;
 | 
			
		||||
} sp;
 | 
			
		||||
 | 
			
		||||
// lists of tests, terminated with 0x8000
 | 
			
		||||
uint16_t easyExponents[] = {15, 0x8000};
 | 
			
		||||
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
 | 
			
		||||
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
 | 
			
		||||
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
 | 
			
		||||
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000}; 
 | 
			
		||||
uint16_t zeros[] = {0x0000, 0x8000};
 | 
			
		||||
uint16_t infs[] = {0x7C00, 0xFC00};
 | 
			
		||||
uint16_t nans[] = {0x7D00, 0x7D01};
 | 
			
		||||
 | 
			
		||||
void softfloatInit(void) {
 | 
			
		||||
    softfloat_roundingMode = softfloat_round_minMag; 
 | 
			
		||||
    softfloat_exceptionFlags = 0;
 | 
			
		||||
    softfloat_detectTininess = softfloat_tininess_beforeRounding;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float convFloat(float16_t f16) {
 | 
			
		||||
    float32_t f32;
 | 
			
		||||
    float res;
 | 
			
		||||
    sp r;
 | 
			
		||||
 | 
			
		||||
    f32 = f16_to_f32(f16);
 | 
			
		||||
    r.v = f32;
 | 
			
		||||
    res = r.f;
 | 
			
		||||
    return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
 | 
			
		||||
    float16_t result;
 | 
			
		||||
    int op, flagVals;
 | 
			
		||||
    char calc[80], flags[80];
 | 
			
		||||
    float32_t x32, y32, z32, r32;
 | 
			
		||||
    float xf, yf, zf, rf;
 | 
			
		||||
    float16_t smallest;
 | 
			
		||||
 | 
			
		||||
    if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
 | 
			
		||||
    if (!add) z.v = 0x0000; // force z to 0 to avoid add
 | 
			
		||||
    if (negp) x.v ^= 0x8000; // flip sign of x to negate p
 | 
			
		||||
    if (negz) z.v ^= 0x8000; // flip sign of z to negate z
 | 
			
		||||
    op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
 | 
			
		||||
//    printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
 | 
			
		||||
    softfloat_exceptionFlags = 0; // clear exceptions
 | 
			
		||||
    result = f16_mulAdd(x, y, z);
 | 
			
		||||
 | 
			
		||||
    sprintf(flags, "NV: %d OF: %d UF: %d NX: %d", 
 | 
			
		||||
        (softfloat_exceptionFlags >> 4) % 2,
 | 
			
		||||
        (softfloat_exceptionFlags >> 2) % 2,
 | 
			
		||||
        (softfloat_exceptionFlags >> 1) % 2,
 | 
			
		||||
        (softfloat_exceptionFlags) % 2);
 | 
			
		||||
    // pack these four flags into one nibble, discarding DZ flag
 | 
			
		||||
    flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // convert to floats for printing
 | 
			
		||||
    xf = convFloat(x);
 | 
			
		||||
    yf = convFloat(y);
 | 
			
		||||
    zf = convFloat(z);
 | 
			
		||||
    rf = convFloat(result);
 | 
			
		||||
    if (mul)
 | 
			
		||||
        if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
 | 
			
		||||
        else     sprintf(calc, "%f * %f = %f", xf, yf, rf);
 | 
			
		||||
    else         sprintf(calc, "%f + %f = %f", xf, zf, rf);
 | 
			
		||||
 | 
			
		||||
    // omit denorms, which aren't required for this project
 | 
			
		||||
    smallest.v = 0x0400;
 | 
			
		||||
    float16_t resultmag = result;
 | 
			
		||||
    resultmag.v &= 0x7FFF; // take absolute value
 | 
			
		||||
    if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
 | 
			
		||||
    if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
 | 
			
		||||
    if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed)  fprintf(fptr, "// Skip inf: ");
 | 
			
		||||
    if (resultmag.v >  0x7C00 && !nanAllowed)  fprintf(fptr, "// Skip NaN: ");
 | 
			
		||||
    fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases, 
 | 
			
		||||
               FILE *fptr, int *numCases) {
 | 
			
		||||
    int i, j;
 | 
			
		||||
 | 
			
		||||
    fprintf(fptr, desc); fprintf(fptr, "\n");
 | 
			
		||||
    *numCases=0;
 | 
			
		||||
    for (i=0; e[i] != 0x8000; i++)
 | 
			
		||||
        for (j=0; f[j] != 0x8000; j++) {
 | 
			
		||||
            cases[*numCases].v = f[j] | e[i]<<10;
 | 
			
		||||
            *numCases = *numCases + 1;
 | 
			
		||||
        }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
 | 
			
		||||
    int i, j, k, numCases;
 | 
			
		||||
    float16_t x, y, z;
 | 
			
		||||
    float16_t cases[100000];
 | 
			
		||||
    FILE *fptr;
 | 
			
		||||
    char fn[80];
 | 
			
		||||
 
 | 
			
		||||
    sprintf(fn, "work/%s.tv", testName);
 | 
			
		||||
    fptr = fopen(fn, "w");
 | 
			
		||||
    prepTests(e, f, testName, desc, cases, fptr, &numCases);
 | 
			
		||||
    z.v = 0x0000;
 | 
			
		||||
    for (i=0; i < numCases; i++) { 
 | 
			
		||||
        x.v = cases[i].v;
 | 
			
		||||
        for (j=0; j<numCases; j++) {
 | 
			
		||||
            y.v = cases[j].v;
 | 
			
		||||
            for (k=0; k<=sgn; k++) {
 | 
			
		||||
                y.v ^= (k<<15);
 | 
			
		||||
                genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fclose(fptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
 | 
			
		||||
    int i, j, k, numCases;
 | 
			
		||||
    float16_t x, y, z;
 | 
			
		||||
    float16_t cases[100000];
 | 
			
		||||
    FILE *fptr;
 | 
			
		||||
    char fn[80];
 | 
			
		||||
 
 | 
			
		||||
    sprintf(fn, "work/%s.tv", testName);
 | 
			
		||||
    fptr = fopen(fn, "w");
 | 
			
		||||
    prepTests(e, f, testName, desc, cases, fptr, &numCases);
 | 
			
		||||
    y.v = 0x0000;
 | 
			
		||||
    for (i=0; i < numCases; i++) {
 | 
			
		||||
        x.v = cases[i].v;
 | 
			
		||||
        for (j=0; j<numCases; j++) {
 | 
			
		||||
            z.v = cases[j].v;
 | 
			
		||||
            for (k=0; k<=sgn; k++) {
 | 
			
		||||
                z.v ^= (k<<15);
 | 
			
		||||
                genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fclose(fptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
 | 
			
		||||
    int i, j, k, l, numCases;
 | 
			
		||||
    float16_t x, y, z;
 | 
			
		||||
    float16_t cases[100000];
 | 
			
		||||
    FILE *fptr;
 | 
			
		||||
    char fn[80];
 | 
			
		||||
 
 | 
			
		||||
    sprintf(fn, "work/%s.tv", testName);
 | 
			
		||||
    fptr = fopen(fn, "w");
 | 
			
		||||
    prepTests(e, f, testName, desc, cases, fptr, &numCases);
 | 
			
		||||
    for (i=0; i < numCases; i++) {
 | 
			
		||||
        x.v = cases[i].v;
 | 
			
		||||
        for (j=0; j<numCases; j++) {
 | 
			
		||||
            y.v = cases[j].v;
 | 
			
		||||
            for (k=0; k<numCases; k++) {
 | 
			
		||||
                z.v = cases[k].v;
 | 
			
		||||
                for (l=0; l<=sgn; l++) {
 | 
			
		||||
                    z.v ^= (l<<15);
 | 
			
		||||
                    genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fclose(fptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
 | 
			
		||||
    int i, j, k, sx, sy, sz, numCases;
 | 
			
		||||
    float16_t x, y, z;
 | 
			
		||||
    float16_t cases[100000];
 | 
			
		||||
    FILE *fptr;
 | 
			
		||||
    char fn[80];
 | 
			
		||||
 
 | 
			
		||||
    sprintf(fn, "work/%s.tv", testName);
 | 
			
		||||
    fptr = fopen(fn, "w");
 | 
			
		||||
    prepTests(e, f, testName, desc, cases, fptr, &numCases);
 | 
			
		||||
    cases[numCases].v = 0x0000; // add +0 case
 | 
			
		||||
    cases[numCases+1].v = 0x8000; // add -0 case
 | 
			
		||||
    numCases += 2; 
 | 
			
		||||
    for (i=0; i < numCases; i++) {
 | 
			
		||||
        x.v = cases[i].v;
 | 
			
		||||
        for (j=0; j<numCases; j++) {
 | 
			
		||||
            y.v = cases[j].v;
 | 
			
		||||
            for (k=0; k<numCases; k++) {
 | 
			
		||||
                z.v = cases[k].v;
 | 
			
		||||
                for (sx=0; sx<=sgn; sx++) {
 | 
			
		||||
                    x.v ^= (sx<<15);
 | 
			
		||||
                    for (sy=0; sy<=sgn; sy++) {
 | 
			
		||||
                        y.v ^= (sy<<15);
 | 
			
		||||
                        for (sz=0; sz<=sgn; sz++) {
 | 
			
		||||
                            z.v ^= (sz<<15);
 | 
			
		||||
                            genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fclose(fptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
    softfloatInit(); // configure softfloat modes
 | 
			
		||||
 
 | 
			
		||||
    // Test cases: multiplication
 | 
			
		||||
    genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
    // Test cases: addition
 | 
			
		||||
    genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
    // Test cases: FMA
 | 
			
		||||
    genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
    genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
    // Test cases: Zero, Infinity, NaN
 | 
			
		||||
    genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
 | 
			
		||||
 
 | 
			
		||||
    // Full test cases with other rounding modes
 | 
			
		||||
    softfloat_roundingMode = softfloat_round_near_even; 
 | 
			
		||||
    genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
 | 
			
		||||
    softfloat_roundingMode = softfloat_round_min; 
 | 
			
		||||
    genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
 | 
			
		||||
    softfloat_roundingMode = softfloat_round_max; 
 | 
			
		||||
    genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
 | 
			
		||||
  
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
@ -1,8 +0,0 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
# check for warnings in Verilog code
 | 
			
		||||
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
 | 
			
		||||
export PATH=$PATH:/usr/local/bin/
 | 
			
		||||
verilator=`which verilator`
 | 
			
		||||
 | 
			
		||||
basepath=$(dirname $0)/..
 | 
			
		||||
$verilator --lint-only --top-module fma16 fma16.v
 | 
			
		||||
@ -1,2 +0,0 @@
 | 
			
		||||
vsim -do "do fma.do"
 | 
			
		||||
 | 
			
		||||
@ -1 +0,0 @@
 | 
			
		||||
vsim -c -do "do fma.do"
 | 
			
		||||
@ -1 +0,0 @@
 | 
			
		||||
make -C ../../../synthDC synth DESIGN=fma16
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
/* verilator lint_off STMTDLY */
 | 
			
		||||
module testbench_fma16;
 | 
			
		||||
  reg        clk, reset;
 | 
			
		||||
  reg [15:0] x, y, z, rexpected;
 | 
			
		||||
  wire [15:0] result;
 | 
			
		||||
  reg [7:0]  ctrl;
 | 
			
		||||
  reg [3:0]  flagsexpected;
 | 
			
		||||
  reg        mul, add, negp, negz;
 | 
			
		||||
  reg [1:0]  roundmode;
 | 
			
		||||
  reg [31:0] vectornum, errors;
 | 
			
		||||
  reg [75:0] testvectors[10000:0];
 | 
			
		||||
 | 
			
		||||
  // instantiate device under test
 | 
			
		||||
  fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
 | 
			
		||||
 | 
			
		||||
  // generate clock
 | 
			
		||||
  always 
 | 
			
		||||
    begin
 | 
			
		||||
      clk = 1; #5; clk = 0; #5;
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
  // at start of test, load vectors and pulse reset
 | 
			
		||||
  initial
 | 
			
		||||
    begin
 | 
			
		||||
      $readmemh("work/fmul_0.tv", testvectors);
 | 
			
		||||
      vectornum = 0; errors = 0;
 | 
			
		||||
      reset = 1; #22; reset = 0;
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
  // apply test vectors on rising edge of clk
 | 
			
		||||
  always @(posedge clk)
 | 
			
		||||
    begin
 | 
			
		||||
      #1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
 | 
			
		||||
      {roundmode, mul, add, negp, negz} = ctrl[5:0];
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
  // check results on falling edge of clk
 | 
			
		||||
  always @(negedge clk)
 | 
			
		||||
    if (~reset) begin // skip during reset
 | 
			
		||||
      if (result !== rexpected) begin  // check result     // *** should also add tests on flags eventually
 | 
			
		||||
        $display("Error: inputs %h * %h + %h", x, y, z);
 | 
			
		||||
        $display("  result = %h (%h expected)", result, rexpected);
 | 
			
		||||
        errors = errors + 1;
 | 
			
		||||
      end
 | 
			
		||||
      vectornum = vectornum + 1;
 | 
			
		||||
      if (testvectors[vectornum] === 'x) begin 
 | 
			
		||||
        $display("%d tests completed with %d errors", 
 | 
			
		||||
	           vectornum, errors);
 | 
			
		||||
        $stop;
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
endmodule
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,130 +0,0 @@
 | 
			
		||||
#!/usr/bin/perl -w
 | 
			
		||||
# torturegen.pl 
 | 
			
		||||
# David_Harris@hmc.edu 19 April 2022
 | 
			
		||||
# Convert TestFloat cases into format for fma16 project torture test
 | 
			
		||||
# Strip out cases involving denorms
 | 
			
		||||
 | 
			
		||||
use strict;
 | 
			
		||||
 | 
			
		||||
my @basenames = ("add", "mul", "mulAdd");
 | 
			
		||||
my @roundingmodes = ("rz", "rd", "ru", "rne");
 | 
			
		||||
my @names = ();
 | 
			
		||||
foreach my $name (@basenames) {
 | 
			
		||||
    foreach my $mode (@roundingmodes) {
 | 
			
		||||
        push(@names, "f16_${name}_$mode.tv");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
 | 
			
		||||
my $datestring = localtime();
 | 
			
		||||
print(TORTURE "// Torture tests generated $datestring by $0\n");
 | 
			
		||||
foreach my $tv (@names) {
 | 
			
		||||
    open(TV, "work/$tv") || die("Can't read $tv");
 | 
			
		||||
    my $type = &getType($tv); # is it mul, add, mulAdd
 | 
			
		||||
    my $rm = &getRm($tv); # rounding mode
 | 
			
		||||
#   if ($rm != 0) { next; } # only do rz
 | 
			
		||||
    print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
 | 
			
		||||
    print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
 | 
			
		||||
    my $linecount = 0;
 | 
			
		||||
    my $babyTorture = 0;
 | 
			
		||||
    while (<TV>) {
 | 
			
		||||
        my $line = $_;
 | 
			
		||||
        $linecount++;
 | 
			
		||||
        my $density = 10;
 | 
			
		||||
        if ($type eq "mulAdd") {$density = 500;}
 | 
			
		||||
        if ($babyTorture) {
 | 
			
		||||
            $density = 100;
 | 
			
		||||
            if ($type eq "mulAdd") {$density = 50000;}
 | 
			
		||||
        }
 | 
			
		||||
        if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
 | 
			
		||||
        chomp($line); # strip off newline
 | 
			
		||||
        my @parts = split(/_/, $line);
 | 
			
		||||
        my ($x, $y, $z, $op, $w, $flags);
 | 
			
		||||
        $x = $parts[0];
 | 
			
		||||
        if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
 | 
			
		||||
        if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
 | 
			
		||||
        $op = $rm << 4;
 | 
			
		||||
        if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
 | 
			
		||||
        if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
 | 
			
		||||
        my $opname = sprintf("%02x", $op);
 | 
			
		||||
        if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
 | 
			
		||||
        if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
 | 
			
		||||
        $flags = substr($flags, -1); # take last character
 | 
			
		||||
        if (&fpval($w) eq "NaN") { $w = "7e00"; }
 | 
			
		||||
        my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
 | 
			
		||||
        my $skip = "";
 | 
			
		||||
        if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
 | 
			
		||||
            $skip = "Skipped denorm";
 | 
			
		||||
        }
 | 
			
		||||
        my $summary = &summary($x, $y, $z, $w, $type);
 | 
			
		||||
        if ($skip ne "") {
 | 
			
		||||
            print TORTURE "// $skip $tv line $linecount $line $summary\n"
 | 
			
		||||
        }
 | 
			
		||||
        else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
 | 
			
		||||
    }
 | 
			
		||||
    close(TV);
 | 
			
		||||
}
 | 
			
		||||
close(TORTURE);
 | 
			
		||||
 | 
			
		||||
sub fpval {
 | 
			
		||||
    my $val = shift;
 | 
			
		||||
    $val = hex($val); # convert hex string to number
 | 
			
		||||
    my $frac = $val & 0x3FF;
 | 
			
		||||
    my $exp = ($val >> 10) & 0x1F;
 | 
			
		||||
    my $sign = $val >> 15;
 | 
			
		||||
 | 
			
		||||
    my $res;
 | 
			
		||||
    if ($exp == 31 && $frac != 0) { return "NaN"; }
 | 
			
		||||
    elsif ($exp == 31) { $res = "INF"; }
 | 
			
		||||
    elsif ($val == 0) { $res = 0; }
 | 
			
		||||
    elsif ($exp == 0) { $res = "Denorm"; }
 | 
			
		||||
    else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
 | 
			
		||||
 | 
			
		||||
    if ($sign == 1) { $res = "-$res"; }
 | 
			
		||||
    return $res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub summary {
 | 
			
		||||
    my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
 | 
			
		||||
 | 
			
		||||
    my $xv = &fpval($x);
 | 
			
		||||
    my $yv = &fpval($y);
 | 
			
		||||
    my $zv = &fpval($z);
 | 
			
		||||
    my $wv = &fpval($w);
 | 
			
		||||
 | 
			
		||||
    if ($type eq "add") { return "$xv + $zv = $wv"; }
 | 
			
		||||
    elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
 | 
			
		||||
    else {return "$xv * $yv + $zv = $wv"; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub getType {
 | 
			
		||||
    my $tv = shift;
 | 
			
		||||
 | 
			
		||||
    if ($tv =~ /mulAdd/) { return("mulAdd"); }
 | 
			
		||||
    elsif ($tv =~ /mul/) { return "mul"; }
 | 
			
		||||
    else { return "add"; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub getRm {
 | 
			
		||||
    my $tv = shift;
 | 
			
		||||
 | 
			
		||||
    if ($tv =~ /rz/) { return 0; }
 | 
			
		||||
    elsif ($tv =~ /rne/) { return 1; }
 | 
			
		||||
    elsif ($tv =~ /rd/) {return 2; }
 | 
			
		||||
    elsif ($tv =~ /ru/) { return 3; }
 | 
			
		||||
    else { return "bad"; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub isdenorm {
 | 
			
		||||
    my $fp = shift;
 | 
			
		||||
    my $val = hex($fp);
 | 
			
		||||
    my $expv = $val >> 10;
 | 
			
		||||
    $expv = $expv & 0x1F;
 | 
			
		||||
    my $denorm = 0;
 | 
			
		||||
    if ($expv == 0 && $val != 0) { $denorm = 1;}
 | 
			
		||||
 #   my $e0 = ($expv == 0);
 | 
			
		||||
 #   my $vn0 = ($val != 0);
 | 
			
		||||
 #   my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
 | 
			
		||||
 #   print("Num $fp Exp $expv Denorm $denorm Done\n");
 | 
			
		||||
    return $denorm;
 | 
			
		||||
}
 | 
			
		||||
@ -1,62 +0,0 @@
 | 
			
		||||
onerror {resume}
 | 
			
		||||
quietly WaveActivateNextPane {} 0
 | 
			
		||||
add wave -noupdate /testbench_fma16/clk
 | 
			
		||||
add wave -noupdate /testbench_fma16/reset
 | 
			
		||||
add wave -noupdate /testbench_fma16/x
 | 
			
		||||
add wave -noupdate /testbench_fma16/y
 | 
			
		||||
add wave -noupdate /testbench_fma16/z
 | 
			
		||||
add wave -noupdate /testbench_fma16/result
 | 
			
		||||
add wave -noupdate /testbench_fma16/rexpected
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/x
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/y
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/z
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/mul
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/add
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/negr
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/negz
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/roundmode
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/result
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/XManE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/YManE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZManE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/XExpE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/YExpE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZExpE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/PExpE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/Ne
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/upOneExt
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/XSgnE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/YSgnE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZSgnE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/PSgnE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ProdManE
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/NfracS
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ProdManAl
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZManExt
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZManAl
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/Nfrac
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/res
 | 
			
		||||
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/NSamt
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ZExpGreater
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/ACLess
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/upOne
 | 
			
		||||
add wave -noupdate /testbench_fma16/dut/KillProd
 | 
			
		||||
TreeUpdate [SetDefaultTree]
 | 
			
		||||
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
 | 
			
		||||
quietly wave cursor active 2
 | 
			
		||||
configure wave -namecolwidth 237
 | 
			
		||||
configure wave -valuecolwidth 64
 | 
			
		||||
configure wave -justifyvalue left
 | 
			
		||||
configure wave -signalnamewidth 0
 | 
			
		||||
configure wave -snapdistance 10
 | 
			
		||||
configure wave -datasetprefix 0
 | 
			
		||||
configure wave -rowmargin 4
 | 
			
		||||
configure wave -childrowmargin 2
 | 
			
		||||
configure wave -gridoffset 0
 | 
			
		||||
configure wave -gridperiod 1
 | 
			
		||||
configure wave -griddelta 40
 | 
			
		||||
configure wave -timeline 0
 | 
			
		||||
configure wave -timelineunits ns
 | 
			
		||||
update
 | 
			
		||||
WaveRestoreZoom {4083 ns} {4235 ns}
 | 
			
		||||
@ -10,7 +10,7 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
module fcmp (   
 | 
			
		||||
   input logic  [`FPSIZES/3:0]   FmtE,           // precision 1 = double 0 = single
 | 
			
		||||
   input logic  [`FMTBITS-1:0]   FmtE,           // precision 1 = double 0 = single
 | 
			
		||||
   input logic  [2:0]            FOpCtrlE,       // see above table
 | 
			
		||||
   input logic                   XSgnE, YSgnE,   // input signs
 | 
			
		||||
   input logic  [`NE-1:0]        XExpE, YExpE,   // input exponents
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@ module fctrl (
 | 
			
		||||
  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
 | 
			
		||||
  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
 | 
			
		||||
  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
 | 
			
		||||
  output logic [`FPSIZES/3:0] FmtD,        // precision - single-0 double-1
 | 
			
		||||
  output logic [`FMTBITS-1:0] FmtD,        // precision - single-0 double-1
 | 
			
		||||
  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
 | 
			
		||||
  output logic       FWriteIntD   // is the result written to the integer register
 | 
			
		||||
  );
 | 
			
		||||
@ -73,14 +73,12 @@ module fctrl (
 | 
			
		||||
                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
 | 
			
		||||
                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l   l->s
 | 
			
		||||
                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
 | 
			
		||||
                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
 | 
			
		||||
                                endcase
 | 
			
		||||
                    7'b1100000: case(Rs2D[1:0])
 | 
			
		||||
                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s   s->w
 | 
			
		||||
                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s  s->wu
 | 
			
		||||
                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s   s->l
 | 
			
		||||
                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s  s->lu
 | 
			
		||||
                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
 | 
			
		||||
                                endcase
 | 
			
		||||
                    7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
 | 
			
		||||
                    7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
 | 
			
		||||
@ -89,14 +87,12 @@ module fctrl (
 | 
			
		||||
                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
 | 
			
		||||
                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l   l->d
 | 
			
		||||
                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
 | 
			
		||||
                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
 | 
			
		||||
                                endcase
 | 
			
		||||
                    7'b1100001: case(Rs2D[1:0])
 | 
			
		||||
                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d   d->w
 | 
			
		||||
                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d  d->wu
 | 
			
		||||
                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d   d->l
 | 
			
		||||
                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d  d->lu
 | 
			
		||||
                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
 | 
			
		||||
                                endcase
 | 
			
		||||
                    7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
 | 
			
		||||
                    7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
 | 
			
		||||
@ -121,13 +117,8 @@ module fctrl (
 | 
			
		||||
  //    0-single
 | 
			
		||||
  //    1-double
 | 
			
		||||
  
 | 
			
		||||
    if (`FPSIZES == 1)begin
 | 
			
		||||
      logic [1:0] FmtTmp;
 | 
			
		||||
      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
 | 
			
		||||
      assign FmtD = `FMT == FmtTmp;
 | 
			
		||||
end
 | 
			
		||||
      //assign FmtD = 0; *** change back after full paramerterization
 | 
			
		||||
 | 
			
		||||
    if (`FPSIZES == 1)
 | 
			
		||||
      assign FmtD = 0;
 | 
			
		||||
    else if (`FPSIZES == 2)begin
 | 
			
		||||
      logic [1:0] FmtTmp;
 | 
			
		||||
      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,7 @@ module fcvt (
 | 
			
		||||
    input logic             XNaNE,          // is the input a NaN
 | 
			
		||||
    input logic             XSNaNE,         // is the input a signaling NaN
 | 
			
		||||
    input logic [2:0]       FrmE,           // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
 | 
			
		||||
    input logic [`FPSIZES/3:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
 | 
			
		||||
    input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
 | 
			
		||||
    output logic [`FLEN-1:0] CvtResE,       // the fp conversion result
 | 
			
		||||
    output logic [`XLEN-1:0] CvtIntResE,    // the int conversion result
 | 
			
		||||
    output logic [4:0]      CvtFlgE         // the conversion's flags
 | 
			
		||||
@ -37,7 +37,7 @@ module fcvt (
 | 
			
		||||
    // (FI) fp  -> int coversion signals
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    logic [`FPSIZES/3:0]    OutFmt;     // format of the output
 | 
			
		||||
    logic [`FMTBITS-1:0]    OutFmt;     // format of the output
 | 
			
		||||
    logic [`XLEN-1:0]       PosInt;     // the positive integer input
 | 
			
		||||
    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
 | 
			
		||||
    logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
 | 
			
		||||
 | 
			
		||||
@ -34,7 +34,7 @@ module fma(
 | 
			
		||||
    input logic                 reset,
 | 
			
		||||
    input logic                 FlushM,     // flush the memory stage
 | 
			
		||||
    input logic                 StallM,     // stall memory stage
 | 
			
		||||
    input logic  [`FPSIZES/3:0] FmtE, FmtM, // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
 | 
			
		||||
    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
 | 
			
		||||
    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
 | 
			
		||||
@ -102,7 +102,7 @@ module fma1(
 | 
			
		||||
    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
 | 
			
		||||
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
 | 
			
		||||
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
 | 
			
		||||
    input logic  [`FPSIZES/3:0] FmtE,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [`FMTBITS-1:0] FmtE,       // precision 1 = double 0 = single
 | 
			
		||||
    output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
 | 
			
		||||
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
 | 
			
		||||
    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
 | 
			
		||||
@ -161,7 +161,7 @@ endmodule
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
module expadd(    
 | 
			
		||||
    input  logic [`FPSIZES/3:0] FmtE,          // precision
 | 
			
		||||
    input  logic [`FMTBITS-1:0] FmtE,          // precision
 | 
			
		||||
    input  logic [`NE-1:0]      XExpE, YExpE,  // input exponents
 | 
			
		||||
    input  logic                XZeroE, YZeroE,        // are the inputs zero
 | 
			
		||||
    output logic [`NE+1:0]      ProdExpE       // product's exponent B^(1023)NE+2
 | 
			
		||||
@ -378,7 +378,7 @@ module fma2(
 | 
			
		||||
    input logic     [`NE-1:0]               ZExpM, // input exponents
 | 
			
		||||
    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
 | 
			
		||||
    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
 | 
			
		||||
    input logic     [`FPSIZES/3:0]          FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
 | 
			
		||||
    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
 | 
			
		||||
    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
 | 
			
		||||
@ -517,7 +517,7 @@ module normalize(
 | 
			
		||||
    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
 | 
			
		||||
    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
 | 
			
		||||
    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
 | 
			
		||||
    input logic  [`FPSIZES/3:0]         FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic                         KillProdM,  // is the product set to zero
 | 
			
		||||
    input logic 			            ZDenormM,
 | 
			
		||||
    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
 | 
			
		||||
@ -681,7 +681,7 @@ module normalize(
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
module fmaround(
 | 
			
		||||
    input logic  [`FPSIZES/3:0] FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [`FMTBITS-1:0] FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [2:0]          FrmM,       // rounding mode
 | 
			
		||||
    input logic                 UfSticky,   // sticky bit for underlow calculation
 | 
			
		||||
    input logic  [`NF+1:0]      NormSum,    // normalized sum
 | 
			
		||||
@ -920,7 +920,7 @@ module fmaflags(
 | 
			
		||||
    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
 | 
			
		||||
    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
 | 
			
		||||
    input logic                 Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
 | 
			
		||||
    input logic  [`FPSIZES/3:0] FmtM,                   // precision 1 = double 0 = single
 | 
			
		||||
    input logic  [`FMTBITS-1:0] FmtM,                   // precision 1 = double 0 = single
 | 
			
		||||
    output logic                Invalid, Overflow, Underflow, // flags used to select the result
 | 
			
		||||
    output logic [4:0]          FMAFlgM // FMA flags
 | 
			
		||||
);
 | 
			
		||||
@ -996,7 +996,7 @@ module resultselect(
 | 
			
		||||
    input logic     [`NE-1:0]       ZExpM, // input exponents
 | 
			
		||||
    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
 | 
			
		||||
    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
 | 
			
		||||
    input logic     [`FPSIZES/3:0]  FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic     [`FMTBITS-1:0]  FmtM,       // precision 1 = double 0 = single
 | 
			
		||||
    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
 | 
			
		||||
    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
 | 
			
		||||
    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
 | 
			
		||||
 | 
			
		||||
@ -65,7 +65,7 @@ module fpu (
 | 
			
		||||
   // control signals
 | 
			
		||||
   logic 		  FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
 | 
			
		||||
   logic [2:0] 	  FrmD, FrmE, FrmM;                   // FP rounding mode
 | 
			
		||||
   logic 		  FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
 | 
			
		||||
   logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
 | 
			
		||||
   logic 		  FDivStartD, FDivStartE;             // Start division or squareroot
 | 
			
		||||
   logic 		  FWriteIntD;                         // Write to integer register
 | 
			
		||||
   logic [1:0] 	  FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
 | 
			
		||||
@ -77,19 +77,19 @@ module fpu (
 | 
			
		||||
   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input
 | 
			
		||||
 | 
			
		||||
   // regfile signals
 | 
			
		||||
   logic [63:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
 | 
			
		||||
   logic [63:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
 | 
			
		||||
   logic [63:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
 | 
			
		||||
   logic [63:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
 | 
			
		||||
   logic [63:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)
 | 
			
		||||
   logic [`FLEN-1:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
 | 
			
		||||
   logic [`FLEN-1:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
 | 
			
		||||
   logic [`FLEN-1:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
 | 
			
		||||
   logic [`FLEN-1:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
 | 
			
		||||
   logic [`FLEN-1:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)
 | 
			
		||||
 | 
			
		||||
   // unpacking signals
 | 
			
		||||
   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
 | 
			
		||||
   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
 | 
			
		||||
   logic [10:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
 | 
			
		||||
   logic [10:0] 	  ZExpM;                              // input's exponent - memory stage
 | 
			
		||||
   logic [52:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
 | 
			
		||||
   logic [52:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
 | 
			
		||||
   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
 | 
			
		||||
   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
 | 
			
		||||
   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
 | 
			
		||||
   logic [`NF:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
 | 
			
		||||
   logic 		  XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
 | 
			
		||||
   logic 		  XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
 | 
			
		||||
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
 | 
			
		||||
@ -107,28 +107,29 @@ module fpu (
 | 
			
		||||
   logic 		  FOpCtrlQ;     
 | 
			
		||||
 | 
			
		||||
   // result and flag signals
 | 
			
		||||
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
 | 
			
		||||
   logic [`FLEN-1:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
 | 
			
		||||
   logic [4:0] 	  FDivFlgM;                 // divide/squareroot flags  
 | 
			
		||||
   logic [63:0] 	  FMAResM, FMAResW;                   // FMA/multiply result
 | 
			
		||||
   logic [`FLEN-1:0] 	  FMAResM, FMAResW;                   // FMA/multiply result
 | 
			
		||||
   logic [4:0] 	  FMAFlgM;                   // FMA/multiply result	
 | 
			
		||||
   logic [63:0] 	  ReadResW;                           // read result (load instruction)
 | 
			
		||||
   logic [63:0] 	  CvtResE;                   // FP <-> int convert result
 | 
			
		||||
   logic [`FLEN-1:0] 	  ReadResW;                           // read result (load instruction)
 | 
			
		||||
   logic [`FLEN-1:0] 	  CvtResE;                   // FP <-> int convert result
 | 
			
		||||
   logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
 | 
			
		||||
   logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
 | 
			
		||||
   logic [`XLEN-1:0] 	  ClassResE;               // classify result
 | 
			
		||||
   logic [63:0] 	  CmpResE;                   // compare result
 | 
			
		||||
   logic [`FLEN-1:0] 	  CmpResE;                   // compare result
 | 
			
		||||
   logic 		  CmpNVE;                     // compare invalid flag (Not Valid)     
 | 
			
		||||
   logic [63:0] 	  SgnResE;                   // sign injection result
 | 
			
		||||
   logic [63:0] 	  FResE, FResM, FResW;                // selected result that is ready in the memory stage
 | 
			
		||||
   logic [`FLEN-1:0] 	  SgnResE;                   // sign injection result
 | 
			
		||||
   logic [`FLEN-1:0] 	  FResE, FResM, FResW;                // selected result that is ready in the memory stage
 | 
			
		||||
   logic [4:0] 	  FFlgE, FFlgM;                       // selected flag that is ready in the memory stage     
 | 
			
		||||
   logic [`XLEN-1:0] 	  FIntResE;     
 | 
			
		||||
   logic [63:0] 	  FPUResultW;                         // final FP result being written to the FP register     
 | 
			
		||||
   logic [`FLEN-1:0] 	  FPUResultW;                         // final FP result being written to the FP register     
 | 
			
		||||
   // other signals
 | 
			
		||||
   logic 		  FDivSqrtDoneE;                      // is divide done
 | 
			
		||||
   logic [63:0] 	  DivInput1E, DivInput2E;             // inputs to divide/squareroot unit
 | 
			
		||||
   logic [`FLEN-1:0] 	  DivInput1E, DivInput2E;             // inputs to divide/squareroot unit
 | 
			
		||||
   logic 		  load_preload;                       // enable for FF on fpdivsqrt     
 | 
			
		||||
   logic [63:0] 	  AlignedSrcAE;                       // align SrcA to the floating point format
 | 
			
		||||
   logic [63:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
 | 
			
		||||
   logic [`FLEN-1:0] 	  AlignedSrcAE;                       // align SrcA to the floating point format
 | 
			
		||||
   logic [`FLEN-1:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
 | 
			
		||||
   logic [`FLEN-1:0]     BoxedOneE;                         // Zero value for Z for multiplication, with NaN boxing if needed
 | 
			
		||||
   
 | 
			
		||||
   // DECODE STAGE
 | 
			
		||||
 | 
			
		||||
@ -144,12 +145,12 @@ module fpu (
 | 
			
		||||
      .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));	
 | 
			
		||||
 | 
			
		||||
   // D/E pipeline registers
 | 
			
		||||
   flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
 | 
			
		||||
   flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
 | 
			
		||||
   flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
 | 
			
		||||
   flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
 | 
			
		||||
   flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
 | 
			
		||||
   flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
 | 
			
		||||
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
 | 
			
		||||
                           {Adr1E, Adr2E, Adr3E});
 | 
			
		||||
   flopenrc #(16) DECtrlReg3(clk, reset, FlushE, ~StallE, 
 | 
			
		||||
   flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
 | 
			
		||||
               {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
 | 
			
		||||
               {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
 | 
			
		||||
 | 
			
		||||
@ -160,17 +161,39 @@ module fpu (
 | 
			
		||||
                  .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
 | 
			
		||||
 | 
			
		||||
   // forwarding muxs
 | 
			
		||||
   mux3  #(64)  fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
 | 
			
		||||
   mux3  #(64)  fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
 | 
			
		||||
   mux3  #(64)  fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
 | 
			
		||||
   mux3  #(64)  fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, 
 | 
			
		||||
            {2'b0, {10{1'b1}}, 52'b0}, 
 | 
			
		||||
            {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)}, 
 | 
			
		||||
            FSrcYE); // Force Z to be 0 for multiply instructions
 | 
			
		||||
   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
 | 
			
		||||
   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
 | 
			
		||||
   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   generate
 | 
			
		||||
      if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
 | 
			
		||||
      else if(`FPSIZES == 2) 
 | 
			
		||||
         mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
 | 
			
		||||
      else if(`FPSIZES == 3 | `FPSIZES == 4) 
 | 
			
		||||
         mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, 
 | 
			
		||||
                              {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, 
 | 
			
		||||
                              {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, 
 | 
			
		||||
                              {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
 | 
			
		||||
   endgenerate
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
 | 
			
		||||
   
 | 
			
		||||
   // Force Z to be 0 for multiply instructions 
 | 
			
		||||
   mux2 #(64) fmulzeromux (64'hFFFFFFFF00000000, 64'b0, FmtE, BoxedZeroE); // NaN boxing for 32-bit zero
 | 
			
		||||
   mux3  #(64)  fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
 | 
			
		||||
      
 | 
			
		||||
   generate
 | 
			
		||||
   if(`FPSIZES == 1) assign BoxedZeroE = 0;
 | 
			
		||||
   else if(`FPSIZES == 2) 
 | 
			
		||||
      mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`FLEN-`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
 | 
			
		||||
   else if(`FPSIZES == 3 | `FPSIZES == 4)
 | 
			
		||||
      mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, (`FLEN-`S_LEN)'(0)}, 
 | 
			
		||||
                                 {{`FLEN-`D_LEN{1'b1}}, (`FLEN-`D_LEN)'(0)}, 
 | 
			
		||||
                                 {{`FLEN-`H_LEN{1'b1}}, (`FLEN-`H_LEN)'(0)}, 
 | 
			
		||||
                                 (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
 | 
			
		||||
   endgenerate
 | 
			
		||||
 | 
			
		||||
   mux3  #(`FLEN)  fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
 | 
			
		||||
 | 
			
		||||
   // unpack unit
 | 
			
		||||
   //    - splits FP inputs into their various parts
 | 
			
		||||
   //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
 | 
			
		||||
@ -195,13 +218,13 @@ module fpu (
 | 
			
		||||
      .FMAFlgM, .FMAResM);
 | 
			
		||||
 | 
			
		||||
   // fpdivsqrt using Goldschmidt's iteration
 | 
			
		||||
   flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
 | 
			
		||||
   flopenrc #(`FLEN) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
 | 
			
		||||
         .clear(FDivSqrtDoneE), .en(load_preload),
 | 
			
		||||
         .reset(reset),  .clk(clk));
 | 
			
		||||
   flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
 | 
			
		||||
   flopenrc #(`FLEN) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
 | 
			
		||||
            .clear(FDivSqrtDoneE), .en(load_preload),
 | 
			
		||||
            .reset(reset),  .clk(clk));
 | 
			
		||||
   flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), 
 | 
			
		||||
   flopenrc #(8+int'(`FMTBITS-1)) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), 
 | 
			
		||||
            .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
 | 
			
		||||
            .clear(FDivSqrtDoneE), .en(load_preload),
 | 
			
		||||
            .reset(reset),  .clk(clk));
 | 
			
		||||
@ -223,11 +246,19 @@ module fpu (
 | 
			
		||||
   //        - if there are any unsused bits the most significant bits are filled with 1s
 | 
			
		||||
   assign FWriteDataE = FSrcYE[`XLEN-1:0];     
 | 
			
		||||
 | 
			
		||||
   // Align SrcA to MSB when single precicion
 | 
			
		||||
   mux2  #(64)  SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
 | 
			
		||||
 | 
			
		||||
   // NaN Block SrcA
 | 
			
		||||
   generate
 | 
			
		||||
   if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
 | 
			
		||||
   else if(`FPSIZES == 2) 
 | 
			
		||||
      mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
 | 
			
		||||
   else if(`FPSIZES == 3 | `FPSIZES == 4)
 | 
			
		||||
      mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]}, 
 | 
			
		||||
                             {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, 
 | 
			
		||||
                             {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, 
 | 
			
		||||
                             {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
 | 
			
		||||
   endgenerate
 | 
			
		||||
   // select a result that may be written to the FP register
 | 
			
		||||
   mux4  #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
 | 
			
		||||
   mux4  #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
 | 
			
		||||
   mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
 | 
			
		||||
 | 
			
		||||
   // select the result that may be written to the integer register - to IEU
 | 
			
		||||
@ -239,16 +270,16 @@ module fpu (
 | 
			
		||||
   // E/M pipe registers
 | 
			
		||||
 | 
			
		||||
   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
 | 
			
		||||
   flopenrc #(54) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
 | 
			
		||||
   flopenrc #(54) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
 | 
			
		||||
   flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
 | 
			
		||||
   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
 | 
			
		||||
   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
 | 
			
		||||
   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
 | 
			
		||||
   flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, 
 | 
			
		||||
            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
 | 
			
		||||
            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
 | 
			
		||||
   flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); 
 | 
			
		||||
   flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); 
 | 
			
		||||
   flopenrc #(5)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);      
 | 
			
		||||
   flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
 | 
			
		||||
   flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM,
 | 
			
		||||
   flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
 | 
			
		||||
               {FRegWriteE, FResultSelE, FrmE, FmtE},
 | 
			
		||||
               {FRegWriteM, FResultSelM, FrmM, FmtM});
 | 
			
		||||
 | 
			
		||||
@ -258,10 +289,10 @@ module fpu (
 | 
			
		||||
   mux4  #(5)  FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
 | 
			
		||||
 | 
			
		||||
   // M/W pipe registers
 | 
			
		||||
   flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
 | 
			
		||||
   flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
 | 
			
		||||
   flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
 | 
			
		||||
   flopenrc #(4)  MWCtrlReg(clk, reset, FlushW, ~StallW,
 | 
			
		||||
   flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
 | 
			
		||||
   flopenrc #(`FLEN) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
 | 
			
		||||
   flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
 | 
			
		||||
   flopenrc #(4+int'(`FMTBITS-1))  MWCtrlReg(clk, reset, FlushW, ~StallW,
 | 
			
		||||
            {FRegWriteM, FResultSelM, FmtM},
 | 
			
		||||
            {FRegWriteW, FResultSelW, FmtW});
 | 
			
		||||
 | 
			
		||||
@ -270,8 +301,17 @@ module fpu (
 | 
			
		||||
   // put ReadData into NaN-blocking format
 | 
			
		||||
   //    - if there are any unsused bits the most significant bits are filled with 1s
 | 
			
		||||
   //    - for load instruction
 | 
			
		||||
   mux2  #(64)  ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
 | 
			
		||||
   generate
 | 
			
		||||
      if(`FPSIZES == 1) assign ReadResW = {{`FLEN-`XLEN{1'b1}}, ReadDataW};
 | 
			
		||||
      else if(`FPSIZES == 2) 
 | 
			
		||||
         mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ReadDataW[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
 | 
			
		||||
      else if(`FPSIZES == 3 | `FPSIZES == 4)
 | 
			
		||||
         mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ReadDataW[`S_LEN-1:0]}, 
 | 
			
		||||
                              {{`FLEN-`D_LEN{1'b1}}, ReadDataW[`D_LEN-1:0]}, 
 | 
			
		||||
                              {{`FLEN-`H_LEN{1'b1}}, ReadDataW[`H_LEN-1:0]}, 
 | 
			
		||||
                              {{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); // NaN boxing zeroes
 | 
			
		||||
   endgenerate
 | 
			
		||||
 | 
			
		||||
   // select the result to be written to the FP register
 | 
			
		||||
   mux4  #(64)  FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
 | 
			
		||||
   mux4  #(`FLEN)  FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
 | 
			
		||||
endmodule // fpu
 | 
			
		||||
 | 
			
		||||
@ -31,7 +31,7 @@
 | 
			
		||||
module fsgninj (  
 | 
			
		||||
	input logic        	XSgnE, YSgnE,	// X and Y sign bits
 | 
			
		||||
	input logic [`FLEN-1:0] 	FSrcXE,			// X
 | 
			
		||||
	input logic [`FPSIZES/3:0]		FmtE,			// precision 1 = double 0 = single
 | 
			
		||||
	input logic [`FMTBITS-1:0]		FmtE,			// precision 1 = double 0 = single
 | 
			
		||||
	input  logic [1:0]  SgnOpCodeE,		// operation control
 | 
			
		||||
	output logic [`FLEN-1:0] SgnResE			// result
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
module unpack ( 
 | 
			
		||||
    input logic  [`FLEN-1:0]        X, Y, Z,    // inputs from register file
 | 
			
		||||
    input logic  [`FPSIZES/3:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
 | 
			
		||||
    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
 | 
			
		||||
    output logic                    XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
 | 
			
		||||
    output logic [`NE-1:0]          XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
 | 
			
		||||
    output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
module unpackinput ( 
 | 
			
		||||
    input logic  [`FLEN-1:0]        In,    // inputs from register file
 | 
			
		||||
    input logic  [`FPSIZES/3:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
 | 
			
		||||
    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
 | 
			
		||||
    output logic                    Sgn,    // sign bits of XYZ
 | 
			
		||||
    output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)
 | 
			
		||||
    output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
 | 
			
		||||
 | 
			
		||||
@ -54,7 +54,7 @@ module testbenchfp;
 | 
			
		||||
  logic [4:0]           FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
 | 
			
		||||
  logic [4:0]	 	        ResFlg;                                                            // Result flags
 | 
			
		||||
  logic [4:0]           FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
 | 
			
		||||
  logic	[`FPSIZES/3:0]  ModFmt, FmaModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
 | 
			
		||||
  logic	[`FMTBITS-1:0]  ModFmt, FmaModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
 | 
			
		||||
  logic [`FLEN-1:0]     FmaRes, DivRes, CmpRes, CvtRes;  // Results from each unit
 | 
			
		||||
  logic [`XLEN-1:0]     CvtIntRes;  // Results from each unit
 | 
			
		||||
  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
 | 
			
		||||
@ -669,9 +669,9 @@ module testbenchfp;
 | 
			
		||||
  //    - 1 for the larger precision
 | 
			
		||||
  //    - 0 for the smaller precision
 | 
			
		||||
  always_comb begin
 | 
			
		||||
    if(`FPSIZES/3 === 1) ModFmt = FmtVal;
 | 
			
		||||
    if(`FMTBITS == 2) ModFmt = FmtVal;
 | 
			
		||||
    else ModFmt = FmtVal === `FMT;
 | 
			
		||||
    if(`FPSIZES/3 === 1) FmaModFmt = FmaFmtVal;
 | 
			
		||||
    if(`FMTBITS == 2) FmaModFmt = FmaFmtVal;
 | 
			
		||||
    else FmaModFmt = FmaFmtVal === `FMT;
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
@ -1283,7 +1283,7 @@ endmodule
 | 
			
		||||
 | 
			
		||||
module readfmavectors (
 | 
			
		||||
  input logic                 clk,
 | 
			
		||||
  input logic [`FPSIZES/3:0]  FmaModFmt,              // the modified format
 | 
			
		||||
  input logic [`FMTBITS-1:0]  FmaModFmt,              // the modified format
 | 
			
		||||
  input logic [1:0]           FmaFmt,                 // the format of the FMA inputs
 | 
			
		||||
  input logic [`FLEN*4+7:0]   TestVector,             // the test vector
 | 
			
		||||
  output logic [`FLEN-1:0]    Ans,                    // the correct answer
 | 
			
		||||
@ -1358,7 +1358,7 @@ endmodule
 | 
			
		||||
module readvectors (
 | 
			
		||||
  input logic clk,
 | 
			
		||||
  input logic [`FLEN*4+7:0] TestVector,
 | 
			
		||||
  input logic [`FPSIZES/3:0] ModFmt,
 | 
			
		||||
  input logic [`FMTBITS-1:0] ModFmt,
 | 
			
		||||
  input logic [1:0] Fmt,
 | 
			
		||||
  input logic [2:0] Unit,
 | 
			
		||||
  input logic [31:0] VectorNum,
 | 
			
		||||
 | 
			
		||||
@ -30,7 +30,7 @@ eval file copy -force [glob ${hdl_src}/*/*.sv] {hdl/}
 | 
			
		||||
eval file copy -force [glob ${hdl_src}/*/flop/*.sv] {hdl/}
 | 
			
		||||
 | 
			
		||||
# Only for FMA class project; comment out when done
 | 
			
		||||
eval file copy -force [glob ${hdl_src}/fma/fma16.v] {hdl/}
 | 
			
		||||
# eval file copy -force [glob ${hdl_src}/fma/fma16.v] {hdl/}
 | 
			
		||||
 | 
			
		||||
# Enables name mapping
 | 
			
		||||
if { $saifpower == 1 } {
 | 
			
		||||
@ -332,8 +332,8 @@ redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n
 | 
			
		||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
 | 
			
		||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
 | 
			
		||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
 | 
			
		||||
redirect -append $filename { echo "\n\n\n//// Critical paths through faddcvt ////\n\n\n" }
 | 
			
		||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {faddcvt/*} -nworst 1 }
 | 
			
		||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
 | 
			
		||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }
 | 
			
		||||
 | 
			
		||||
set filename [format "%s%s%s%s" $outputDir  "/reports/" $my_toplevel "_mmu_timing.rep"]
 | 
			
		||||
redirect -append $filename { echo "\n\n\n//// Critical paths through immu/physicaladdress ////\n\n\n" }
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user