diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 5f595b1f..e698cdaf 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -85,6 +85,6 @@ module fma(
         
     fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
     
-    fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
+    fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt);
 endmodule
 
diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv
index a05084e2..f70b1bc9 100644
--- a/pipelined/src/fpu/fmalza.sv
+++ b/pipelined/src/fpu/fmalza.sv
@@ -30,28 +30,37 @@
 `include "wally-config.vh"
 
 module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
-    input logic  [3*`NF+6:0] A,     // addend
-    input logic  [2*`NF+3:0] Pm,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       SCnt   // normalization shift count for the positive result
+    input logic [3*`NF+6:0] 	       A, // addend
+    input logic [2*`NF+3:0] 	       Pm, // product
+    input logic                        Cin, // carry in
+    output logic [$clog2(3*`NF+7)-1:0] SCnt   // normalization shift count for the positive result
     ); 
 
     localparam WIDTH = 3*`NF+7;
     
-    logic [WIDTH-1:0] B, P, G, K, F;
-    logic [WIDTH-1:0] Pp1, Gm1, Km1;
+   logic [WIDTH-1:0] 		       B,F;
+   logic [WIDTH-1:0] 		       P, G;
+   logic [WIDTH-2:0]  K;
+    logic [WIDTH-2:0] Pp1, Gm1, Km1;
 
     assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product
 
+   // next steps***replace P[WIDTH-1] with sub, then remove one bit from A
+   
     assign P = A^B;
-    assign G = A&B;
-    assign K= ~A&~B;
+    assign G = A[WIDTH-2:0]&B[WIDTH-2:0];
+    assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0];
 
+   assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]};
+   assign Gm1 = {G[WIDTH-3:0], Cin};
+   assign Km1 = {K[WIDTH-3:0], ~Cin};
+   
     // Apply function to determine Leading pattern
     //      - note: the paper linked above uses the numbering system where 0 is the most significant bit
     //f[n] = ~P[n]&P[n-1]           note: n is the MSB
     //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1]))
     assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2];
-    assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1}));
+    assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1));
 
-    lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt));
+    lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt));
 endmodule