zbc and carry-less multiply work properly

2023-02-15 17:37:09 -08:00 · 2023-02-15 17:37:09 -08:00 · 6ac54a180e
commit 6ac54a180e
parent cf8392cbd8
2 changed files with 29 additions and 46 deletions
--- a/src/ieu/bmu/clmul.sv
+++ b/src/ieu/bmu/clmul.sv
@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// clmul.sv
+// clmul.sv (carry-less multiplier)
 //
 // Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
 // Created: 1 February 2023
@ -30,35 +30,24 @@
 `include "wally-config.vh"
 module clmul #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0] A, B,       // Operands
+  input  logic [WIDTH-1:0] A, B,             // Operands
  output logic [WIDTH-1:0] ClmulResult);     // ZBS result
-  logic [WIDTH-1:0] pp [WIDTH-1:0]; //partial AND products
+  logic [(WIDTH*WIDTH)-1:0] s;
-  // Note: only generates the bottom WIDTH bits of the carryless multiply.
+  logic [WIDTH-1:0] intial;
-  //    To get the high bits or the reversed bits, the inputs can be shifted and reversed
+  
-  //    as they are in zbc where this is instantiated
+  integer i;
-  /*
+  integer j;
-  genvar i;
+
-  for (i=0; i<WIDTH; i++) begin
+  always_comb begin
-    assign pp[i] = ((A & {(WIDTH){B[i]}}) << i); // Fill partial product array
+    for (i=0;i<WIDTH;i++) begin: outer
-    // ClmulResult ^= pp[i];
+      s[WIDTH*i]=A[0]&B[i];
-  end
+      for (j=1;j<=i;j++) begin: inner
-  assign ClmulResult = pp.xor();
+        s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
-  */
+      end
-  genvar i,j;
+      ClmulResult[i] = s[WIDTH*i+j-1];
  for (i=1; i<WIDTH;i++) begin:outer //loop fills partial product array
    for (j=0;j<=i;j++) begin:inner
      assign pp[i][j] = A[i]&B[j];
    end
  end
 /*
  for (i=1;i<WIDTH;i++) begin:xortree
    assign ClmulResult[i] = ^pp[i:0][i];
  end
 */
  assign ClmulResult[0] = A[0]&B[0];
 endmodule
--- a/src/ieu/bmu/zbc.sv
+++ b/src/ieu/bmu/zbc.sv
@ -36,43 +36,37 @@ module zbc #(parameter WIDTH=32) (
  logic [WIDTH-1:0] ClmulResult, RevClmulResult;
  logic [WIDTH-1:0] RevA, RevB;
-  logic [WIDTH-1:0] X,Y;
+  logic [WIDTH-1:0] x,y;
-  genvar i;
+  bitreverse #(WIDTH) brA(.a(A), .b(RevA));
  bitreverse #(WIDTH) brB(.a(B), .b(RevB));
  bitreverse brA(.a(A), .b(RevA));
  bitreverse brB(.a(B), .b(RevB));
  //NOTE: Is it better to mux in input to a SINGLE clmul or to instantiate 3 clmul and MUX the result?
  //current implementation CP goes MUX -> CLMUL -> MUX -> RESULT
  //alternate could have CLMUL * 3 -> MUX -> MUX
  always_comb begin
    casez (Funct3)
      3'b001: begin //clmul
-        X = A;
+        x = A;
-        Y = B;
+        y = B;
      end
      3'b011: begin //clmulh
-        X = {RevA[WIDTH-2:0], {1'b0}};
+        x = {RevA[WIDTH-2:0], {1'b0}};
-        Y = {{1'b0}, RevB[WIDTH-2:0]};
+        y = {{1'b0}, RevB[WIDTH-2:0]};
      end
      3'b010: begin //clmulr
-        X = {A[WIDTH-2:0], {1'b0}};
+        x = RevA;
-        Y = B;
+        y = RevB;
      end
      default: begin
-        X = 0;
+        x = 0;
-        Y = 0;
+        y = 0;
      end
    endcase
  end
-  clmul clm(.A(X), .B(Y), .ClmulResult(ClmulResult));
+  clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
-  bitreverse brClmulResult(.a(ClmulResult), .b(RevClmulResult));
+  bitreverse  #(WIDTH) brClmulResult(.a(ClmulResult), .b(RevClmulResult));
-  assign ZBCResult = (Funct3 == 3'b011) ? RevClmulResult : ClmulResult;
+  assign ZBCResult = (Funct3 == 3'b011  || Funct3 == 3'b010) ? RevClmulResult : ClmulResult;
 endmodule