zbc and carry-less multiply work properly

2023-02-15 17:37:09 -08:00 · 2023-02-15 17:37:09 -08:00 · 6ac54a180e
commit 6ac54a180e
parent cf8392cbd8
2 changed files with 29 additions and 46 deletions
--- a/src/ieu/bmu/clmul.sv
+++ b/src/ieu/bmu/clmul.sv
@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// clmul.sv
+// clmul.sv (carry-less multiplier)
 //
 // Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
 // Created: 1 February 2023
@ -30,35 +30,24 @@
 `include "wally-config.vh"

 module clmul #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0] A, B,       // Operands
+  input  logic [WIDTH-1:0] A, B,             // Operands
  output logic [WIDTH-1:0] ClmulResult);     // ZBS result

-  logic [WIDTH-1:0] pp [WIDTH-1:0]; //partial AND products
-  // Note: only generates the bottom WIDTH bits of the carryless multiply.
-  //    To get the high bits or the reversed bits, the inputs can be shifted and reversed
-  //    as they are in zbc where this is instantiated
-  /*
-  genvar i;
-  for (i=0; i<WIDTH; i++) begin
-    assign pp[i] = ((A & {(WIDTH){B[i]}}) << i); // Fill partial product array
-    // ClmulResult ^= pp[i];
-  end
-  assign ClmulResult = pp.xor();
-  */
-  genvar i,j;
-  for (i=1; i<WIDTH;i++) begin:outer //loop fills partial product array
-    for (j=0;j<=i;j++) begin:inner
-      assign pp[i][j] = A[i]&B[j];
+  logic [(WIDTH*WIDTH)-1:0] s;
+  logic [WIDTH-1:0] intial;
+  
+  integer i;
+  integer j;
+
+  always_comb begin
+    for (i=0;i<WIDTH;i++) begin: outer
+      s[WIDTH*i]=A[0]&B[i];
+      for (j=1;j<=i;j++) begin: inner
+        s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
+      end
+      ClmulResult[i] = s[WIDTH*i+j-1];
    end
  end
- /*
-  for (i=1;i<WIDTH;i++) begin:xortree
-    assign ClmulResult[i] = ^pp[i:0][i];
-    
-  end
-*/
-
-  assign ClmulResult[0] = A[0]&B[0];

 endmodule

--- a/src/ieu/bmu/zbc.sv
+++ b/src/ieu/bmu/zbc.sv
@ -36,43 +36,37 @@ module zbc #(parameter WIDTH=32) (

  logic [WIDTH-1:0] ClmulResult, RevClmulResult;
  logic [WIDTH-1:0] RevA, RevB;
-  logic [WIDTH-1:0] X,Y;
+  logic [WIDTH-1:0] x,y;


-  genvar i;
-  
-
-  bitreverse brA(.a(A), .b(RevA));
-  bitreverse brB(.a(B), .b(RevB));
+  bitreverse #(WIDTH) brA(.a(A), .b(RevA));
+  bitreverse #(WIDTH) brB(.a(B), .b(RevB));
   
-  //NOTE: Is it better to mux in input to a SINGLE clmul or to instantiate 3 clmul and MUX the result?
-  //current implementation CP goes MUX -> CLMUL -> MUX -> RESULT
-  //alternate could have CLMUL * 3 -> MUX -> MUX
  always_comb begin
    casez (Funct3)
      3'b001: begin //clmul
-        X = A;
-        Y = B;
+        x = A;
+        y = B;
      end
      3'b011: begin //clmulh
-        X = {RevA[WIDTH-2:0], {1'b0}};
-        Y = {{1'b0}, RevB[WIDTH-2:0]};
+        x = {RevA[WIDTH-2:0], {1'b0}};
+        y = {{1'b0}, RevB[WIDTH-2:0]};
      end
      3'b010: begin //clmulr
-        X = {A[WIDTH-2:0], {1'b0}};
-        Y = B;
+        x = RevA;
+        y = RevB;
      end
      default: begin
-        X = 0;
-        Y = 0;
+        x = 0;
+        y = 0;
      end
    endcase
    
  end
-  clmul clm(.A(X), .B(Y), .ClmulResult(ClmulResult));
-  bitreverse brClmulResult(.a(ClmulResult), .b(RevClmulResult));
+  clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
+  bitreverse  #(WIDTH) brClmulResult(.a(ClmulResult), .b(RevClmulResult));

-  assign ZBCResult = (Funct3 == 3'b011) ? RevClmulResult : ClmulResult;
+  assign ZBCResult = (Funct3 == 3'b011  || Funct3 == 3'b010) ? RevClmulResult : ClmulResult;


 endmodule