From 618cc18903c1e2208fae73a45073b60ede60f811 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 12 Aug 2021 13:36:33 -0500 Subject: [PATCH] Optimized subwordread to reduce critical path from 8 muxes to 5 muxes + 1 AND gate. --- wally-pipelined/src/lsu/subwordread.sv | 94 ++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/wally-pipelined/src/lsu/subwordread.sv b/wally-pipelined/src/lsu/subwordread.sv index 352a33c05..892cb2c0e 100644 --- a/wally-pipelined/src/lsu/subwordread.sv +++ b/wally-pipelined/src/lsu/subwordread.sv @@ -37,9 +37,70 @@ module subwordread ( logic [7:0] ByteM; logic [15:0] HalfwordM; + + logic [`XLEN-1:0] offset0, offset1, offset2, offset3; + // HSIZED[3] is the unsigned bit. mask upper bits. + // HSIZED[1:0] is the size of the memory access. + generate if (`XLEN == 64) begin + // more complex solution, but faster + // 5 mux + 1 AND gate in series. + logic [`XLEN-1:0] offset4, offset5, offset6, offset7; + + always_comb + case(HSIZED[1:0]) + 3: offset0 = HRDATA; //ld + 2: offset0 = HSIZED[3] ? {{32'b0}, HRDATA[31:0]} : {{32{HRDATA[31]}}, HRDATA[31:0]}; //lw(u) + 1: offset0 = HSIZED[3] ? {{48'b0}, HRDATA[15:0]} : {{48{HRDATA[15]}}, HRDATA[15:0]}; //lh(u) + 0: offset0 = HSIZED[3] ? {{56'b0}, HRDATA[7:0]} : {{56{HRDATA[7]}}, HRDATA[7:0]}; //lb(u) + endcase + + assign offset1 = HSIZED[3] ? {{56'b0}, HRDATA[15:8]} : {{56{HRDATA[15]}}, HRDATA[15:8]}; //lb(u) + + always_comb + case(HSIZED[0]) + 1: offset2 = HSIZED[3] ? {{48'b0}, HRDATA[31:16]} : {{48{HRDATA[31]}}, HRDATA[31:16]};//lh(u) + 0: offset2 = HSIZED[3] ? {{56'b0}, HRDATA[23:16]} : {{56{HRDATA[23]}}, HRDATA[23:16]};//lb(u) + endcase + + assign offset3 = HSIZED[3] ? {{56'b0}, HRDATA[31:24]} : {{56{HRDATA[31]}}, HRDATA[31:24]};//lb(u) + + always_comb + case(HSIZED[1:0]) + 3: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//ld(u) // unaligned will cause fault. + 2: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//lw(u) + 1: offset4 = HSIZED[3] ? {{48'b0}, HRDATA[47:32]} : {{48{HRDATA[47]}}, HRDATA[47:32]};//lh(u) + 0: offset4 = HSIZED[3] ? {{56'b0}, HRDATA[39:32]} : {{56{HRDATA[39]}}, HRDATA[39:32]};//lb(u) + endcase + + assign offset5 = HSIZED[3] ? {{56'b0}, HRDATA[47:40]} : {{56{HRDATA[47]}}, HRDATA[47:40]};//lb(u) + + always_comb + case(HSIZED[0]) + 1: offset6 = HSIZED[3] ? {{48'b0}, HRDATA[63:48]} : {{48{HRDATA[63]}}, HRDATA[63:48]};//lh(u) + 0: offset6 = HSIZED[3] ? {{56'b0}, HRDATA[55:48]} : {{56{HRDATA[55]}}, HRDATA[55:48]};//lb(u) + endcase + + assign offset7 = HSIZED[3] ? {{56'b0}, HRDATA[63:56]} : {{56{HRDATA[63]}}, HRDATA[63:56]};//lb(u) + + // address mux + always_comb + case(HADDRD[2:0]) + 0: HRDATAMasked = offset0; + 1: HRDATAMasked = offset1; + 2: HRDATAMasked = offset2; + 3: HRDATAMasked = offset3; + 4: HRDATAMasked = offset4; + 5: HRDATAMasked = offset5; + 6: HRDATAMasked = offset6; + 7: HRDATAMasked = offset7; + endcase + + // easier to understand but slower + // 8 muxes in series +/* -----\/----- EXCLUDED -----\/----- // ByteMe mux always_comb case(HADDRD[2:0]) @@ -82,8 +143,40 @@ module subwordread ( 3'b110: HRDATAMasked = {32'b0, WordM[31:0]}; // lwu default: HRDATAMasked = HRDATA; // Shouldn't happen endcase + -----/\----- EXCLUDED -----/\----- */ end else begin // 32-bit // byte mux + + // fast but more complex + always_comb + case(HSIZED[1:0]) + 3: offset0 = HRDATA; //ld illegal + 2: offset0 = HRDATA[31:0]; //lw + 1: offset0 = HSIZED[3] ? {{16'b0}, HRDATA[15:0]} : {{16{HRDATA[15]}}, HRDATA[15:0]}; //lh(u) + 0: offset0 = HSIZED[3] ? {{24'b0}, HRDATA[7:0]} : {{24{HRDATA[7]}}, HRDATA[7:0]}; //lb(u) + endcase + + assign offset1 = HSIZED[3] ? {{24'b0}, HRDATA[15:8]} : {{24{HRDATA[15]}}, HRDATA[15:8]}; //lb(u) + + always_comb + case(HSIZED[0]) + 1: offset2 = HSIZED[3] ? {{16'b0}, HRDATA[31:16]} : {{16{HRDATA[31]}}, HRDATA[31:16]};//lh(u) + 0: offset2 = HSIZED[3] ? {{24'b0}, HRDATA[23:16]} : {{24{HRDATA[23]}}, HRDATA[23:16]};//lb(u) + endcase + + assign offset3 = HSIZED[3] ? {{24'b0}, HRDATA[31:24]} : {{24{HRDATA[31]}}, HRDATA[31:24]};//lb(u) + + // address mux + always_comb + case(HADDRD[1:0]) + 0: HRDATAMasked = offset0; + 1: HRDATAMasked = offset1; + 2: HRDATAMasked = offset2; + 3: HRDATAMasked = offset3; + endcase + + // slow but easier to understand +/* -----\/----- EXCLUDED -----\/----- always_comb case(HADDRD[1:0]) 2'b00: ByteM = HRDATA[7:0]; @@ -109,6 +202,7 @@ module subwordread ( 3'b101: HRDATAMasked = {16'b0, HalfwordM[15:0]}; // lhu default: HRDATAMasked = HRDATA; endcase + -----/\----- EXCLUDED -----/\----- */ end endgenerate endmodule