forked from Github_Repos/cvw
zbc and carry-less multiply work properly
This commit is contained in:
parent
cf8392cbd8
commit
6ac54a180e
@ -1,5 +1,5 @@
|
||||
///////////////////////////////////////////
|
||||
// clmul.sv
|
||||
// clmul.sv (carry-less multiplier)
|
||||
//
|
||||
// Written: Kevin Kim <kekim@hmc.edu> and Kip Macsai-Goren <kmacsaigoren@hmc.edu>
|
||||
// Created: 1 February 2023
|
||||
@ -30,35 +30,24 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module clmul #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
input logic [WIDTH-1:0] A, B, // Operands
|
||||
output logic [WIDTH-1:0] ClmulResult); // ZBS result
|
||||
|
||||
logic [WIDTH-1:0] pp [WIDTH-1:0]; //partial AND products
|
||||
// Note: only generates the bottom WIDTH bits of the carryless multiply.
|
||||
// To get the high bits or the reversed bits, the inputs can be shifted and reversed
|
||||
// as they are in zbc where this is instantiated
|
||||
/*
|
||||
genvar i;
|
||||
for (i=0; i<WIDTH; i++) begin
|
||||
assign pp[i] = ((A & {(WIDTH){B[i]}}) << i); // Fill partial product array
|
||||
// ClmulResult ^= pp[i];
|
||||
end
|
||||
assign ClmulResult = pp.xor();
|
||||
*/
|
||||
genvar i,j;
|
||||
for (i=1; i<WIDTH;i++) begin:outer //loop fills partial product array
|
||||
for (j=0;j<=i;j++) begin:inner
|
||||
assign pp[i][j] = A[i]&B[j];
|
||||
logic [(WIDTH*WIDTH)-1:0] s;
|
||||
logic [WIDTH-1:0] intial;
|
||||
|
||||
integer i;
|
||||
integer j;
|
||||
|
||||
always_comb begin
|
||||
for (i=0;i<WIDTH;i++) begin: outer
|
||||
s[WIDTH*i]=A[0]&B[i];
|
||||
for (j=1;j<=i;j++) begin: inner
|
||||
s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
|
||||
end
|
||||
ClmulResult[i] = s[WIDTH*i+j-1];
|
||||
end
|
||||
end
|
||||
/*
|
||||
for (i=1;i<WIDTH;i++) begin:xortree
|
||||
assign ClmulResult[i] = ^pp[i:0][i];
|
||||
|
||||
end
|
||||
*/
|
||||
|
||||
assign ClmulResult[0] = A[0]&B[0];
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -36,43 +36,37 @@ module zbc #(parameter WIDTH=32) (
|
||||
|
||||
logic [WIDTH-1:0] ClmulResult, RevClmulResult;
|
||||
logic [WIDTH-1:0] RevA, RevB;
|
||||
logic [WIDTH-1:0] X,Y;
|
||||
logic [WIDTH-1:0] x,y;
|
||||
|
||||
|
||||
genvar i;
|
||||
|
||||
|
||||
bitreverse brA(.a(A), .b(RevA));
|
||||
bitreverse brB(.a(B), .b(RevB));
|
||||
bitreverse #(WIDTH) brA(.a(A), .b(RevA));
|
||||
bitreverse #(WIDTH) brB(.a(B), .b(RevB));
|
||||
|
||||
//NOTE: Is it better to mux in input to a SINGLE clmul or to instantiate 3 clmul and MUX the result?
|
||||
//current implementation CP goes MUX -> CLMUL -> MUX -> RESULT
|
||||
//alternate could have CLMUL * 3 -> MUX -> MUX
|
||||
always_comb begin
|
||||
casez (Funct3)
|
||||
3'b001: begin //clmul
|
||||
X = A;
|
||||
Y = B;
|
||||
x = A;
|
||||
y = B;
|
||||
end
|
||||
3'b011: begin //clmulh
|
||||
X = {RevA[WIDTH-2:0], {1'b0}};
|
||||
Y = {{1'b0}, RevB[WIDTH-2:0]};
|
||||
x = {RevA[WIDTH-2:0], {1'b0}};
|
||||
y = {{1'b0}, RevB[WIDTH-2:0]};
|
||||
end
|
||||
3'b010: begin //clmulr
|
||||
X = {A[WIDTH-2:0], {1'b0}};
|
||||
Y = B;
|
||||
x = RevA;
|
||||
y = RevB;
|
||||
end
|
||||
default: begin
|
||||
X = 0;
|
||||
Y = 0;
|
||||
x = 0;
|
||||
y = 0;
|
||||
end
|
||||
endcase
|
||||
|
||||
end
|
||||
clmul clm(.A(X), .B(Y), .ClmulResult(ClmulResult));
|
||||
bitreverse brClmulResult(.a(ClmulResult), .b(RevClmulResult));
|
||||
clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
|
||||
bitreverse #(WIDTH) brClmulResult(.a(ClmulResult), .b(RevClmulResult));
|
||||
|
||||
assign ZBCResult = (Funct3 == 3'b011) ? RevClmulResult : ClmulResult;
|
||||
assign ZBCResult = (Funct3 == 3'b011 || Funct3 == 3'b010) ? RevClmulResult : ClmulResult;
|
||||
|
||||
|
||||
endmodule
|
Loading…
Reference in New Issue
Block a user