Unidata / MetPy

MetPy is a collection of tools in Python for reading, visualizing and performing calculations with weather data.
https://unidata.github.io/MetPy/
BSD 3-Clause "New" or "Revised" License
1.25k stars 416 forks source link

Missing Contour Labels #2189

Open sgdecker opened 2 years ago

sgdecker commented 2 years ago

What went wrong?

I expect the code below to label each of the contours, but the zero contour is unlabeled, as is the 2 contour in Colorado: The output:

0.19.0.post1
3.4.3

missing_labels

Operating System

Linux

Version

1.1.0

Python Version

3.9.6

Code to Reproduce

from datetime import datetime, timedelta

import xarray as xr
from xarray.backends import NetCDF4DataStore
from metpy.units import units
from metpy.plots import ContourPlot, MapPanel, PanelContainer
import metpy.calc as mpcalc
from siphon.catalog import TDSCatalog
import cartopy
import matplotlib

print(cartopy.__version__)
print(matplotlib.__version__)

def get_gfs1deg(init_time, valid_time):
    """Obtain GFS 1-degree data."""
    ymd = init_time.strftime('%Y%m%d')
    hr = init_time.strftime('%H')
    filename = f'{ymd}_{hr}00.grib2'
    cat_name = ('https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/'
                'Global_onedegree_noaaport/GFS_Global_onedeg_noaaport_' + filename +
                '/catalog.xml')
    ds_name = 'GFS_Global_onedeg_noaaport_' + filename
    # levelPa = 100 * level

    cat = TDSCatalog(cat_name)
    ds = cat.datasets[ds_name]
    ncss = ds.subset()
    query = ncss.query()
    query.time(valid_time).variables('all')
    nc = ncss.get_data(query)
    data = xr.open_dataset(NetCDF4DataStore(nc)).metpy.parse_cf()
    return data.metpy.quantify()

init_time = datetime(2021, 11, 3, 12)
plot_time = init_time + timedelta(hours=3)
gfs = get_gfs1deg(init_time, plot_time)

hght = gfs['Geopotential_height_isobaric'].metpy.sel(vertical=850*units('hPa'))[:,1:90,180:]
hght = mpcalc.smooth_gaussian(hght, 6)
ugeo, vgeo = mpcalc.geostrophic_wind(hght)
vor = mpcalc.vorticity(ugeo, vgeo)

cp = ContourPlot()
cp.data = vor
cp.time = plot_time
cp.contours = list(range(-12, 40, 2))
cp.scale = 1e5
cp.linecolor = 'green'
cp.linestyle = 'solid'
cp.clabels = True

panel = MapPanel()
panel.area = [-105, -90, 30, 40]
panel.projection = 'lcc'
panel.layers = ['states', 'coastline', 'borders']
panel.title = f'850-mb Geo. Rel. Vort. (x10^-5 s^-1) at {plot_time}'
panel.plots = [cp]

pc = PanelContainer()
pc.size = (15, 12)
pc.panels = [panel]
pc.save('missing_labels.png')
dopplershift commented 2 years ago

@sgdecker Can you try to reproduce without declarative? I'm pretty sure this is probably a matplotlib issue, not a MetPy one.

sgdecker commented 2 years ago

No issues with a bare-bones plot:

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import xarray as xr
from xarray.backends import NetCDF4DataStore
from metpy.units import units
import metpy.calc as mpcalc
from siphon.catalog import TDSCatalog

def get_gfs1deg(init_time, valid_time):
    """Obtain GFS 1-degree data."""
    ymd = init_time.strftime('%Y%m%d')
    hr = init_time.strftime('%H')
    filename = f'{ymd}_{hr}00.grib2'
    cat_name = ('https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/'
                'Global_onedegree_noaaport/GFS_Global_onedeg_noaaport_' + filename +
                '/catalog.xml')
    ds_name = 'GFS_Global_onedeg_noaaport_' + filename
    # levelPa = 100 * level

    cat = TDSCatalog(cat_name)
    ds = cat.datasets[ds_name]
    ncss = ds.subset()
    query = ncss.query()
    query.time(valid_time).variables('all')
    nc = ncss.get_data(query)
    data = xr.open_dataset(NetCDF4DataStore(nc)).metpy.parse_cf()
    return data.metpy.quantify()

init_time = datetime(2021, 11, 3, 12)
plot_time = init_time + timedelta(hours=3)
gfs = get_gfs1deg(init_time, plot_time)

hght = gfs['Geopotential_height_isobaric'].metpy.sel(vertical=850*units('hPa'))[:,1:90,180:]
hght = mpcalc.smooth_gaussian(hght, 6)
ugeo, vgeo = mpcalc.geostrophic_wind(hght)
vor = mpcalc.vorticity(ugeo, vgeo).squeeze()
vor *= 1e5
area = {'lat': range(30, 42), 'lon': range(254, 272)}
vor = vor.sel(area)

fig, ax = plt.subplots(figsize=(15,12))
c = ax.contour(vor, levels=range(-12, 40, 2), colors='green', linestyles='solid')
ax.clabel(c)
ax.set_title(f'850-mb Geo. Rel. Vort. (x10^-5 s^-1) at {plot_time}')
fig.savefig('missing_labels1.png')

missing_labels1

I'll add Cartopy to the mix when I get a chance.

kgoebber commented 2 years ago

There is a small difference between the two plots. I don't believe we have the declarative syntax actually subset the data, so there are contours being drawn beyond the view window. In your second example you are subsetting the data prior to plotting, so if you were to do a similar subset prior to plotting for declarative do you get the labels to appear as desired?

sgdecker commented 2 years ago

Indeed, that is a key difference. I'll try the workaround for declarative in a bit, but here's the reproduced bug with matplotlib:

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import xarray as xr
from xarray.backends import NetCDF4DataStore
from metpy.units import units
import metpy.calc as mpcalc
from siphon.catalog import TDSCatalog
import cartopy
import matplotlib

def get_gfs1deg(init_time, valid_time):
    """Obtain GFS 1-degree data."""
    ymd = init_time.strftime('%Y%m%d')
    hr = init_time.strftime('%H')
    filename = f'{ymd}_{hr}00.grib2'
    cat_name = ('https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/'
                'Global_onedegree_noaaport/GFS_Global_onedeg_noaaport_' + filename +
                '/catalog.xml')
    ds_name = 'GFS_Global_onedeg_noaaport_' + filename
    # levelPa = 100 * level

    cat = TDSCatalog(cat_name)
    ds = cat.datasets[ds_name]
    ncss = ds.subset()
    query = ncss.query()
    query.time(valid_time).variables('all')
    nc = ncss.get_data(query)
    data = xr.open_dataset(NetCDF4DataStore(nc)).metpy.parse_cf()
    return data.metpy.quantify()

init_time = datetime(2021, 11, 3, 12)
plot_time = init_time + timedelta(hours=3)
gfs = get_gfs1deg(init_time, plot_time)

hght = gfs['Geopotential_height_isobaric'].metpy.sel(vertical=850*units('hPa'))[:,1:90,180:]
hght = mpcalc.smooth_gaussian(hght, 6)
ugeo, vgeo = mpcalc.geostrophic_wind(hght)
vor = mpcalc.vorticity(ugeo, vgeo).squeeze()
vor *= 1e5

fig, ax = plt.subplots(figsize=(15,12))

ax.set_xlim(74, 92)
ax.set_ylim(59, 48)
c = ax.contour(vor, levels=range(-12, 40, 2), colors='green', linestyles='solid')
ax.clabel(c)

ax.set_title(f'850-mb Geo. Rel. Vort. (x10^-5 s^-1) at {plot_time}')
fig.savefig('missing_labels2.png')

missing_labels2

I'll have to see if this is a listed issue for matplotlib, or is this a "feature" of set_x(y)lim?

dopplershift commented 2 years ago

My first instinct here is that matplotlib is contouring all of the data and inserting the labels as makes sense there, then using the limits to set a window over the data. This feel familiar actually...

dopplershift commented 2 years ago

I think https://github.com/matplotlib/matplotlib/issues/2973 is what I was thinking of.

sgdecker commented 2 years ago

Yes, although the interesting thing is here I set the axis limits before drawing the contours (and labels), and the issue still happens.

sgdecker commented 2 years ago

As @kgoebber suggested, doing the subset first is a viable workaround:

from datetime import datetime, timedelta

import xarray as xr
from xarray.backends import NetCDF4DataStore
from metpy.units import units
from metpy.plots import ContourPlot, MapPanel, PanelContainer
import metpy.calc as mpcalc
from siphon.catalog import TDSCatalog
import cartopy
import matplotlib

def get_gfs1deg(init_time, valid_time):
    """Obtain GFS 1-degree data."""
    ymd = init_time.strftime('%Y%m%d')
    hr = init_time.strftime('%H')
    filename = f'{ymd}_{hr}00.grib2'
    cat_name = ('https://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/'
                'Global_onedegree_noaaport/GFS_Global_onedeg_noaaport_' + filename +
                '/catalog.xml')
    ds_name = 'GFS_Global_onedeg_noaaport_' + filename
    # levelPa = 100 * level

    cat = TDSCatalog(cat_name)
    ds = cat.datasets[ds_name]
    ncss = ds.subset()
    query = ncss.query()
    query.time(valid_time).variables('all')
    nc = ncss.get_data(query)
    data = xr.open_dataset(NetCDF4DataStore(nc)).metpy.parse_cf()
    return data.metpy.quantify()

init_time = datetime(2021, 11, 3, 12)
plot_time = init_time + timedelta(hours=3)
gfs = get_gfs1deg(init_time, plot_time)

hght = gfs['Geopotential_height_isobaric'].metpy.sel(vertical=850*units('hPa'))[:,1:90,180:]
hght = mpcalc.smooth_gaussian(hght, 6)
ugeo, vgeo = mpcalc.geostrophic_wind(hght)
vor = mpcalc.vorticity(ugeo, vgeo)
area = {'lat': range(29, 42), 'lon': range(254, 273)}
vor = vor.sel(area)

cp = ContourPlot()
cp.data = vor
cp.time = plot_time
cp.contours = list(range(-12, 40, 2))
cp.scale = 1e5
cp.linecolor = 'green'
cp.linestyle = 'solid'
cp.clabels = True

panel = MapPanel()
panel.area = [-105, -90, 30, 40]
panel.projection = 'lcc'
panel.layers = ['states', 'coastline', 'borders']
panel.title = f'850-mb Geo. Rel. Vort. (x10^-5 s^-1) at {plot_time}'
panel.plots = [cp]

pc = PanelContainer()
pc.size = (15, 12)
pc.panels = [panel]
pc.save('working_labels.png')

working_labels

Since the workaround leads to somewhat redundant code (calling .sel but then also specifying panel.area), perhaps this issue is evolving in the direction of a feature request, that the code behind the scenes use the area attribute to subset the data appropriately and automatically, thereby working around the matplotlib bug.

dopplershift commented 2 years ago

Well, my preference would be to fix the matplotlib bug, rather than complicate MetPy for it, but I know the actual solution might be really tricky.

I'd consider merging a PR that works around it without adding too much complexity, but for myself I'm not finding the payoff/effort ratio particularly compelling.

sgdecker commented 2 years ago

@dopplershift In the context of the original Matplotlib bug report, there is a comment that "the work around is fairly simple", but I can't figure out what that workaround is. Do you have any ideas? In other words, what modification of the code posted there generates visible contour labels for the bottom panel (other than not messing with the limits at all)?