malariagen / malariagen-data-python

Analyse MalariaGEN data from Python
https://malariagen.github.io/malariagen-data-python/latest/
MIT License
13 stars 23 forks source link

UserWarnings when using bokeh.layouts.gridplot for GWSS #521

Open jonbrenas opened 2 months ago

jonbrenas commented 2 months ago

I have solved this locally (because I thought it was messing up my plots, turns out it wasn't) and I was going to update the code.

I remembered, however, that we have a lot of very similar functions doing GWSS plotting that need to be modified in the exact same way to remove these UserWarnings and I was wondering if it wouldn't be a good idea to factor in that code to reduce the errors that my crop up with duplicated code.

alimanfoo commented 1 month ago

Thanks @jonbrenas - can you paste a snippet of code which shows how this can be resolved for a single function? Might help then to decide if/how we factor out some common code to do this for all functions.

jonbrenas commented 1 month ago

Here is my code for windowed Fsts:

def moving_window_fst(contig, window_size, cohort_queries, sample_sets=None, sizing_mode = 'stretch_width', shape_size=5, y_range = (0,1), y_ticker = [0,1], min_cohort_size = 15, title = "Windowed Fst"):
  if len(cohort_queries.keys()) > 2:
    pair_list = list(combinations(cohort_queries.keys(), 2))
    lpl = len(pair_list)
  elif len(cohort_queries.keys()) == 2:
    pair_list = [(list(cohort_queries.keys())[0], list(cohort_queries.keys())[1])]
    lpl = 5
  else:
    print('At least 2 cohort queries are needed')
    return None
  colors= bokeh.palettes.Category10[lpl]
  fst_l = []
  for p in pair_list:
    x, fst = ag3.fst_gwss(
            contig=contig,
            window_size=window_size,
            cohort1_query=cohort_queries[p[0]],
            cohort2_query=cohort_queries[p[1]],
            min_cohort_size=min_cohort_size
    )
    fst_l.append(fst)
  #print(len(fst_l))
  x_min = x[0]
  x_max = x[-1]
  x_range = bokeh.models.Range1d(x_min, x_max, bounds="auto")
  xwheel_zoom = bokeh.models.WheelZoomTool(
      dimensions="width", maintain_focus=False
  )
  xpan_tool = bokeh.models.PanTool(
      dimensions="width"
  )

  fig2, hov_tool = plot_genes(
      region=contig,
      sizing_mode=sizing_mode,
      height = 90,
      x_range=x_range,
      active_scroll=xwheel_zoom,
      active_drag=xpan_tool,
      show=False,
  )

  fig1 = bokeh.plotting.figure(
      title=title,
      tools=[
          xpan_tool,
          "xzoom_in",
          "xzoom_out",
          xwheel_zoom,
          "reset",
          "save",
          "crosshair",
      ],
      active_inspect=hov_tool,
      active_scroll=xwheel_zoom,
      active_drag=xpan_tool,
      sizing_mode=sizing_mode,
      toolbar_location="above",
      x_range=fig2.x_range,
      y_range=y_range,
  )
  for i,p in enumerate(pair_list):
      fig1.circle(
          x=x,
          y=fst_l[i],
          size=shape_size,
          line_width=1,
          line_color=colors[i],
          fill_color=None,
          legend_label= p[0] + " vs. " + p[1]
      )
      i+=1
  fig1.yaxis.axis_label = "Fst"
  fig1.yaxis.ticker = y_ticker 
  ag3._bokeh_style_genome_xaxis(fig1, contig)

  # combine plots into a single figure
  fig = bokeh.layouts.gridplot(
      [fig1, fig2],
      ncols=1,
      toolbar_location="above",
      merge_tools=True,
      sizing_mode=sizing_mode,
  )

  bokeh.plotting.show(fig)
  return fst_l

I had to redefine plot_genes() so that I could set the active_inspect. It looks like this:

def plot_genes(region,
    sizing_mode,
    start = None,
    end = None,
    width = None,
    height = None,
    show = True,
    toolbar_location = 'above',
    active_scroll=None,
    active_drag=None,
    x_range = None,
    title = None,
    output_backend = "webgl",
):

    resolved_region = malariagen_data.util.parse_single_region(ag3, region)

    contig = resolved_region.contig
    start = resolved_region.start
    end = resolved_region.end   

    if start is None:
        start = 0
    if end is None:
        end = len(ag3.genome_sequence(contig))

    if x_range is None:
        x_range = bokeh.models.Range1d(start, end, bounds="auto")

    data, tooltips = ag3._plot_genes_setup_data(region=resolved_region)

    data["bottom"] = np.where(data["strand"] == "+", 1, 0)
    data["top"] = data["bottom"] + 0.8

    data.fillna("", inplace=True)

    xwheel_zoom = bokeh.models.WheelZoomTool(
        dimensions="width", maintain_focus=False
    )
    hov_tool = bokeh.models.HoverTool(
        tooltips=tooltips
    )
    fig = bokeh.plotting.figure(
        title=title,
        sizing_mode=sizing_mode,
        width=width,
        height=height,
        tools=[
            "xpan",
            "xzoom_in",
            "xzoom_out",
            xwheel_zoom,
            "reset",
            "tap",
            hov_tool,
        ],
        toolbar_location=toolbar_location,
        active_scroll=active_scroll,
        active_inspect=hov_tool,
        active_drag=active_drag,
        tooltips=tooltips,
        x_range=x_range,
        y_range=bokeh.models.Range1d(-0.4, 2.2),
        output_backend=output_backend,
    )

    url = "https://vectorbase.org/vectorbase/app/record/gene/@ID"
    taptool = fig.select(type=bokeh.models.TapTool)
    taptool.callback = bokeh.models.OpenURL(url=url)

    fig.quad(
        bottom="bottom",
        top="top",
        left="start",
        right="end",
        source=data,
        line_width=0,
    )

    fig.ygrid.visible = False
    yticks = [0.4, 1.4]
    yticklabels = ["-", "+"]
    fig.yaxis.ticker = yticks
    fig.yaxis.major_label_overrides = {k: v for k, v in zip(yticks, yticklabels)}
    fig.yaxis.axis_label = "Genes"
    ag3._bokeh_style_genome_xaxis(fig, contig)

    if show:  # pragma: no cover
        bokeh.plotting.show(fig)
        return None, hov_tool
    else:
        return fig, hov_tool
alimanfoo commented 1 month ago

xref https://github.com/holoviz/holoviews/issues/5869

alimanfoo commented 1 month ago

Not immediately obvious what the best way forward is.