Optimized subwordread to reduce critical path from 8 muxes to 5 muxes + 1 AND gate.

This commit is contained in:
Ross Thompson 2021-08-12 13:36:33 -05:00
parent 3b327c949f
commit 618cc18903

View File

@ -37,9 +37,70 @@ module subwordread (
logic [7:0] ByteM;
logic [15:0] HalfwordM;
logic [`XLEN-1:0] offset0, offset1, offset2, offset3;
// HSIZED[3] is the unsigned bit. mask upper bits.
// HSIZED[1:0] is the size of the memory access.
generate
if (`XLEN == 64) begin
// more complex solution, but faster
// 5 mux + 1 AND gate in series.
logic [`XLEN-1:0] offset4, offset5, offset6, offset7;
always_comb
case(HSIZED[1:0])
3: offset0 = HRDATA; //ld
2: offset0 = HSIZED[3] ? {{32'b0}, HRDATA[31:0]} : {{32{HRDATA[31]}}, HRDATA[31:0]}; //lw(u)
1: offset0 = HSIZED[3] ? {{48'b0}, HRDATA[15:0]} : {{48{HRDATA[15]}}, HRDATA[15:0]}; //lh(u)
0: offset0 = HSIZED[3] ? {{56'b0}, HRDATA[7:0]} : {{56{HRDATA[7]}}, HRDATA[7:0]}; //lb(u)
endcase
assign offset1 = HSIZED[3] ? {{56'b0}, HRDATA[15:8]} : {{56{HRDATA[15]}}, HRDATA[15:8]}; //lb(u)
always_comb
case(HSIZED[0])
1: offset2 = HSIZED[3] ? {{48'b0}, HRDATA[31:16]} : {{48{HRDATA[31]}}, HRDATA[31:16]};//lh(u)
0: offset2 = HSIZED[3] ? {{56'b0}, HRDATA[23:16]} : {{56{HRDATA[23]}}, HRDATA[23:16]};//lb(u)
endcase
assign offset3 = HSIZED[3] ? {{56'b0}, HRDATA[31:24]} : {{56{HRDATA[31]}}, HRDATA[31:24]};//lb(u)
always_comb
case(HSIZED[1:0])
3: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//ld(u) // unaligned will cause fault.
2: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//lw(u)
1: offset4 = HSIZED[3] ? {{48'b0}, HRDATA[47:32]} : {{48{HRDATA[47]}}, HRDATA[47:32]};//lh(u)
0: offset4 = HSIZED[3] ? {{56'b0}, HRDATA[39:32]} : {{56{HRDATA[39]}}, HRDATA[39:32]};//lb(u)
endcase
assign offset5 = HSIZED[3] ? {{56'b0}, HRDATA[47:40]} : {{56{HRDATA[47]}}, HRDATA[47:40]};//lb(u)
always_comb
case(HSIZED[0])
1: offset6 = HSIZED[3] ? {{48'b0}, HRDATA[63:48]} : {{48{HRDATA[63]}}, HRDATA[63:48]};//lh(u)
0: offset6 = HSIZED[3] ? {{56'b0}, HRDATA[55:48]} : {{56{HRDATA[55]}}, HRDATA[55:48]};//lb(u)
endcase
assign offset7 = HSIZED[3] ? {{56'b0}, HRDATA[63:56]} : {{56{HRDATA[63]}}, HRDATA[63:56]};//lb(u)
// address mux
always_comb
case(HADDRD[2:0])
0: HRDATAMasked = offset0;
1: HRDATAMasked = offset1;
2: HRDATAMasked = offset2;
3: HRDATAMasked = offset3;
4: HRDATAMasked = offset4;
5: HRDATAMasked = offset5;
6: HRDATAMasked = offset6;
7: HRDATAMasked = offset7;
endcase
// easier to understand but slower
// 8 muxes in series
/* -----\/----- EXCLUDED -----\/-----
// ByteMe mux
always_comb
case(HADDRD[2:0])
@ -82,8 +143,40 @@ module subwordread (
3'b110: HRDATAMasked = {32'b0, WordM[31:0]}; // lwu
default: HRDATAMasked = HRDATA; // Shouldn't happen
endcase
-----/\----- EXCLUDED -----/\----- */
end else begin // 32-bit
// byte mux
// fast but more complex
always_comb
case(HSIZED[1:0])
3: offset0 = HRDATA; //ld illegal
2: offset0 = HRDATA[31:0]; //lw
1: offset0 = HSIZED[3] ? {{16'b0}, HRDATA[15:0]} : {{16{HRDATA[15]}}, HRDATA[15:0]}; //lh(u)
0: offset0 = HSIZED[3] ? {{24'b0}, HRDATA[7:0]} : {{24{HRDATA[7]}}, HRDATA[7:0]}; //lb(u)
endcase
assign offset1 = HSIZED[3] ? {{24'b0}, HRDATA[15:8]} : {{24{HRDATA[15]}}, HRDATA[15:8]}; //lb(u)
always_comb
case(HSIZED[0])
1: offset2 = HSIZED[3] ? {{16'b0}, HRDATA[31:16]} : {{16{HRDATA[31]}}, HRDATA[31:16]};//lh(u)
0: offset2 = HSIZED[3] ? {{24'b0}, HRDATA[23:16]} : {{24{HRDATA[23]}}, HRDATA[23:16]};//lb(u)
endcase
assign offset3 = HSIZED[3] ? {{24'b0}, HRDATA[31:24]} : {{24{HRDATA[31]}}, HRDATA[31:24]};//lb(u)
// address mux
always_comb
case(HADDRD[1:0])
0: HRDATAMasked = offset0;
1: HRDATAMasked = offset1;
2: HRDATAMasked = offset2;
3: HRDATAMasked = offset3;
endcase
// slow but easier to understand
/* -----\/----- EXCLUDED -----\/-----
always_comb
case(HADDRD[1:0])
2'b00: ByteM = HRDATA[7:0];
@ -109,6 +202,7 @@ module subwordread (
3'b101: HRDATAMasked = {16'b0, HalfwordM[15:0]}; // lhu
default: HRDATAMasked = HRDATA;
endcase
-----/\----- EXCLUDED -----/\----- */
end
endgenerate
endmodule