XLS needs to reason more explicitly about the possible range of shifts

Consider the following fragment of code [pdk=asap7]

pub fn main(a: u64, b: u64) -> u64 {
    if (b > u64:4) {
        u64:0
    } else { 
      (a >> b) as u64
    }
}

The right shift is guaranteed to be in the range [0,4].

The optimized ir is

top fn __main__main(a: bits[64], b: bits[64]) -> bits[64] {
  literal.15: bits[64] = literal(value=5, id=15, pos=[(0,3,4)])
  ult.16: bits[1] = ult(b, literal.15, id=16, pos=[(0,3,4)])
  shrl.6: bits[64] = shrl(a, b, id=6, pos=[(0,6,9)])
  sign_ext.12: bits[64] = sign_ext(ult.16, new_bit_count=64, id=12, pos=[(0,3,4)])
  ret and.13: bits[64] = and(shrl.6, sign_ext.12, id=13, pos=[(0,3,4)])
}

and the generated verilog is

module main(
  input wire clk,
  input wire [63:0] a,
  input wire [63:0] b,
  output wire [63:0] out
);
  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [63:0] p0_a;
  reg [63:0] p0_b;
  always @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
  end

  // ===== Pipe stage 1:
  wire p1_ult_27_comb;
  wire [63:0] p1_shrl_28_comb;
  assign p1_ult_27_comb = p0_b < 64'h0000_0000_0000_0005;
  assign p1_shrl_28_comb = p0_b >= 64'h0000_0000_0000_0040 ? 64'h0000_0000_0000_0000 : p0_a >> p0_b;

  // Registers for pipe stage 1:
  reg p1_ult_27;
  reg [63:0] p1_shrl_28;
  always @ (posedge clk) begin
    p1_ult_27 <= p1_ult_27_comb;
    p1_shrl_28 <= p1_shrl_28_comb;
  end

  // ===== Pipe stage 2:
  wire [63:0] p2_and_34_comb;
  assign p2_and_34_comb = p1_shrl_28 & {64{p1_ult_27}};

  // Registers for pipe stage 2:
  reg [63:0] p2_and_34;
  always @ (posedge clk) begin
    p2_and_34 <= p2_and_34_comb;
  end
  assign out = p2_and_34;
endmodule

The resulting synthesis has area 130.8.

Now if we are explicit and use the code

pub fn main_truncated(a: u64, b: u64) -> u64 {
    if (b > u64:4) {
        u64:0
    } else { 
      let truncated_b = b as u3;
      (a >> truncated_b) as u64
    }
}

The optimized ir is

top fn __main_truncated__main_truncated(a: bits[64], b: bits[64]) -> bits[64] {
  literal.17: bits[64] = literal(value=5, id=17, pos=[(0,11,4)])
  truncated_b: bits[3] = bit_slice(b, start=0, width=3, id=6)
  ult.18: bits[1] = ult(b, literal.17, id=18, pos=[(0,11,4)])
  shrl.7: bits[64] = shrl(a, truncated_b, id=7, pos=[(0,15,9)])
  sign_ext.14: bits[64] = sign_ext(ult.18, new_bit_count=64, id=14, pos=[(0,11,4)])
  ret and.15: bits[64] = and(shrl.7, sign_ext.14, id=15, pos=[(0,11,4)])
}

and the verilog is

module main_truncated(
  input wire clk,
  input wire [63:0] a,
  input wire [63:0] b,
  output wire [63:0] out
);
  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [63:0] p0_a;
  reg [63:0] p0_b;
  always @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
  end

  // ===== Pipe stage 1:
  wire [2:0] p1_truncated_b_comb;
  wire p1_ult_31_comb;
  wire [63:0] p1_shrl_32_comb;
  assign p1_truncated_b_comb = p0_b[2:0];
  assign p1_ult_31_comb = p0_b < 64'h0000_0000_0000_0005;
  assign p1_shrl_32_comb = p0_a >> p1_truncated_b_comb;

  // Registers for pipe stage 1:
  reg p1_ult_31;
  reg [63:0] p1_shrl_32;
  always @ (posedge clk) begin
    p1_ult_31 <= p1_ult_31_comb;
    p1_shrl_32 <= p1_shrl_32_comb;
  end

  // ===== Pipe stage 2:
  wire [63:0] p2_and_38_comb;
  assign p2_and_38_comb = p1_shrl_32 & {64{p1_ult_31}};

  // Registers for pipe stage 2:
  reg [63:0] p2_and_38;
  always @ (posedge clk) begin
    p2_and_38 <= p2_and_38_comb;
  end
  assign out = p2_and_38;
endmodule

and the synth now has area `107.7'

So either XLS needs to be more explicit about the possible range for the shift or the synthesis tool needs to be able to reason about it. IMO this is evident at XLS level and would be good for XLS to do this as an optimization.

google / xls

XLS needs to reason more explicitly about the possible range of shifts #1059