From d917cc1379a1cf8fdf104b5bf975957c696c8ddc Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sun, 27 Feb 2022 04:36:01 +0000
Subject: [PATCH] fma passing multiply vectors

---
 pipelined/src/fma/Makefile         | 23 +++++++---
 pipelined/src/fma/fma.c            | 47 --------------------
 pipelined/src/fma/fma16.sv         | 70 +++++++++++++++++++++---------
 pipelined/src/fma/fma16_testgen.py | 31 -------------
 pipelined/src/fma/softfloat.a      |  1 -
 5 files changed, 65 insertions(+), 107 deletions(-)
 delete mode 100644 pipelined/src/fma/fma.c
 delete mode 100755 pipelined/src/fma/fma16_testgen.py
 delete mode 120000 pipelined/src/fma/softfloat.a

diff --git a/pipelined/src/fma/Makefile b/pipelined/src/fma/Makefile
index 270954f72..7ca0b2926 100644
--- a/pipelined/src/fma/Makefile
+++ b/pipelined/src/fma/Makefile
@@ -1,10 +1,19 @@
-TARGET ?= fma
+# Makefile
 
-# for some reason, softfloat.a needs to be symlinked to the local directory.  -L isn't working
-$(TARGET): $(TARGET).c Makefile
-	gcc -O2 -o $(TARGET) $(TARGET).c softfloat.a  \
-		 -I../../../addins/SoftFloat-3e/source/include  \
-		 -L../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC 
+CC     = gcc
+CFLAGS = -O3
+LIBS   = -lm
+LFLAGS = -L. 
+IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
+LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+SRCS   = $(wildcard *.c)
+
+PROGS = $(patsubst %.c,%,$(SRCS))
+
+all:	$(PROGS)
+
+%: %.c
+	$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
 
 clean: 
-	rm $(TARGET)
+	rm -f $(PROGS)
diff --git a/pipelined/src/fma/fma.c b/pipelined/src/fma/fma.c
deleted file mode 100644
index 4b7bda1fd..000000000
--- a/pipelined/src/fma/fma.c
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <stdio.h>
-#include <stdint.h>
-#include "softfloat.h"
-#include "softfloat_types.h"
-
-int float_rounding_mode = 0;
-
-union sp {
-  unsigned short x[2];
-  float y;
-} X;
-
-
-int main()
-{
-    uint8_t rounding_mode;
-    uint8_t exceptions;
-
-    uint32_t multiplier, multiplicand, addend, result;
-    float32_t f_multiplier, f_multiplicand, f_addend, f_result;
-
-    multiplier = 0xbf800000;
-    multiplicand = 0xbf800000;
-    addend = 0xffaaaaaa;
-
-    f_multiplier.v = multiplier;
-    f_multiplicand.v = multiplicand;
-    f_addend.v = addend;
-
-    softfloat_roundingMode = rounding_mode;
-    softfloat_exceptionFlags = 0;
-    softfloat_detectTininess = softfloat_tininess_beforeRounding;
-
-    f_result = f32_mulAdd(f_multiplier, f_multiplicand, f_addend);
-
-    result = f_result.v;    
-    exceptions = softfloat_exceptionFlags & 0x1f;
-
-    printf("%x\n", f_result.v);
-
-    // Print out SP number
-    X.x[1] = (f_result.v & 0xffff0000) >> 16;
-    X.x[0] = (f_result.v & 0x0000ffff);
-    printf("Number = %f\n", X.y);
-
-    return 0;
-}
diff --git a/pipelined/src/fma/fma16.sv b/pipelined/src/fma/fma16.sv
index c537baf6d..9732d6c93 100644
--- a/pipelined/src/fma/fma16.sv
+++ b/pipelined/src/fma/fma16.sv
@@ -13,20 +13,25 @@
 
 module fma16(
   input  logic [15:0] x, y, z,
-  input  logic        add, mul, negp, negz,
+  input  logic        mul, add, negp, negz,
   input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
   output logic [15:0] result);
  
   logic [10:0] xm, ym, zm;
-  logic [4:0] xe, ye, ze;
-  logic       xs, ys, zs;
-  logic       zs1; // sign before optional negation
-  logic       ps;  // sign of product
+  logic [4:0]  xe, ye, ze;
+  logic        xs, ys, zs;
+  logic        zs1; // sign before optional negation
+  logic [21:0] pm;
+  logic [5:0]  pe;
+  logic        ps;  // sign of product
+  logic [22:0] rm;
+  logic [6:0]  re;
+  logic        rs;
 
   unpack unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1);  // unpack inputs
   signadj signadj(negp, negz, xs, ys, zs1, ps, zs);             // handle negations
-  mult mult(mul, xm, ym, xe, ye, pm, pe);                       // p = x * y
-  add add(add, pm, zm, pe, ze, ps, zs, rm, re, rs);             // r = z + p
+  mult m(mul, xm, ym, xe, ye, pm, pe);                       // p = x * y
+  add a(add, pm, zm, pe, ze, ps, zs, rm, re, rs);             // r = z + p
   postproc post(roundmode, rm, re, rs, result);                 // normalize, round, pack
 endmodule
 
@@ -34,13 +39,12 @@ module mult(
   input  logic        mul,
   input  logic [10:0] xm, ym,
   input  logic [4:0]  xe, ye,
-  input  logic        xs, ys,
   output logic [21:0] pm,
   output logic [5:0]  pe);
 
   // only multiply if mul = 1
-  assign pm = mul ? xm * ym : xm;       // multiply mantiassas 
-  assign pe = mul ? xe + ye : xe;  
+  assign pm = mul ? xm * ym : {1'b0, xm, 10'b0};       // multiply mantiassas 
+  assign pe = mul ? xe + ye : {1'b0, xe};  
 endmodule
 
 module add(
@@ -58,15 +62,18 @@ module add(
   logic [6:0]  are;
   logic        ars;
 
+  /*
   alignshift as(pe, ze, zm, zmaligned);
   condneg cnp(pm, ps, pmn);
   condneg cnz(zm, zs, zmn);
+  assign 
+  */
   
   // add or pass product through
-  assign rm = add ? arm : pm;
-  assign re = add ? are : pe;
+  assign rm = add ? arm : {1'b0, pm};
+  assign re = add ? are : {1'b0, pe};
   assign rs = add ? ars : ps;
-);
+endmodule
 
 module postproc(
   input  logic [1:0] roundmode,
@@ -75,6 +82,33 @@ module postproc(
   input  logic        rs,
   output logic [15:0] result);
 
+  logic [9:0] uf, uff;
+  logic [6:0] ue;
+  logic [6:0] ueb, uebiased;
+  
+  always_comb 
+    if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
+        ue = re + 7'b1;
+        uf = rm[20:11];
+    end else begin // no normalization shift needed
+        ue = re;
+        uf = rm[19:10];
+    end
+
+  // overflow
+  always_comb begin
+    ueb = ue-7'd15;
+    if (ue >= 7'd46) begin // overflow
+      uebiased = 5'd30;
+      uff = 10'h3ff;
+    end else begin
+      uebiased = ue-7'd15;
+      uff = uf;
+    end
+  end
+  
+  assign result = {rs, uebiased[4:0], uff};
+
   // add special case handling for zeros, NaN, Infinity
 endmodule
 
@@ -107,15 +141,9 @@ module unpacknum(
   logic [9:0] f;  // fraction without leading 1
   logic [4:0] eb; // biased exponent
 
-  assign {f, eb, s} = num; // pull bit fields out of floating-point number
+  assign {s, eb, f} = num; // pull bit fields out of floating-point number
   assign m = {1'b1, f}; // prepend leading 1 to fraction
-  assign e = eb - 15;   // remove bias from exponent
+  assign e = eb;   // leave bias in exponent ***
 endmodule
 
 
-// Tests:
-// Every permutation for x, y, z of 
-//    mantissa = {1.0, 1.0000000001, 1.1, 1.1111111110, 1.1111111111}
-//    biased exponent = {1, 2, 14, 15, 16, 21, 29, 30}
-//    sign = {0, 1}
-//    special case: [normal, 0, INF, NaN]
\ No newline at end of file
diff --git a/pipelined/src/fma/fma16_testgen.py b/pipelined/src/fma/fma16_testgen.py
deleted file mode 100755
index c3cc8d72f..000000000
--- a/pipelined/src/fma/fma16_testgen.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/python3
-
-# fma16_testgen.py
-# David_Harris@hmc.edu 26 February 2022
-# Generate test cases for 16-bit FMA 
-  
-def makeVal(val):
-
-def makeCase(x, y, z, mul, add, msg):
-    xval = makeVal(x);
-    yval = makeVal(y);
-    zval = makeVal(z);
-    mode = mul*2+add; # convert to hexadecimal code
-    expected = makeExpected(x, y, z, mul, add);
-    print(xval,"_", yval, "_", zval, "_", mode, "_", expected, " //", msg);
-
-def makeMulCase(x, y, msg):
-  makeCase(x, y, "0", 1, 0, msg)
-
-################################
-## Main program
-################################
-
-# Directed cases
-makeMulCase("1", "1", "1 x 1");
-
-
-# Corner cases
-
-# Random cases
-
diff --git a/pipelined/src/fma/softfloat.a b/pipelined/src/fma/softfloat.a
deleted file mode 120000
index 508aa0da8..000000000
--- a/pipelined/src/fma/softfloat.a
+++ /dev/null
@@ -1 +0,0 @@
-../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
\ No newline at end of file