pydata / xarray

N-D labeled arrays and datasets in Python
https://xarray.dev
Apache License 2.0
3.65k stars 1.09k forks source link

Increase the usage of augmented assignment statements #5997

Closed elfring closed 5 months ago

elfring commented 3 years ago

:eyes: Some source code analysis tools can help to find opportunities for improving software components. :thought_balloon: I propose to increase the usage of augmented assignment statements accordingly.

Would you like to integrate anything from a transformation result which can be generated by a command like the following? (:point_right: Please check also for questionable change suggestions because of an evolving search pattern.)

lokal$ perl -p -i.orig -0777 -e 's/^(?<indentation>\s+)(?<target>\S+)\s*=\s*\k<target>[ \t]*(?<operator>[+\-%&|^@]|\*\*?|\/\/?|<<|>>)/$+{indentation}$+{target} $+{operator}=/gm' $(find ~/Projekte/xarray/lokal -name '*.py')
max-sixty commented 3 years ago

Sounds reasonable if others agree. Note that that pattern has some false positives around passing kwargs (e.g. f(x=x+1)) so you'd need to curate them.

shoyer commented 3 years ago

I'm hesitant here -- augmented assignment can have different semantics (e.g., inplace updates on Xarray and NumPy objects), and I suspect that in most cases in Xarray we are already using the appropriate operators when it makes a difference for performance.

I don't think reviewing such a change would be good use of limited reviewer time. Of course, feel free to increase usage of augmented assignment when you are editing nearby code and understand the full implications of the change.

max-sixty commented 3 years ago

Thanks @shoyer

elfring commented 3 years ago

:thought_balloon: Can you follow the rationale of the Python enhancement proposal 203 (from 2000-07-13)?

elfring commented 3 years ago

:thought_balloon: Can change suggestions like the following trigger further constructive software development considerations?

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 729f15bb..ab0fd11f 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -204,9 +204,9 @@ def _adjust_n_months(other_day, n, reference_day):
     on the day of a given date, and the reference day provided.
     """
     if n > 0 and other_day < reference_day:
-        n = n - 1
+        n -= 1
     elif n <= 0 and other_day > reference_day:
-        n = n + 1
+        n += 1
     return n

@@ -232,7 +232,7 @@ def _shift_month(date, months, day_option="start"):

     if month == 0:
         month = 12
-        delta_year = delta_year - 1
+        delta_year -= 1
     year = date.year + delta_year

     if day_option == "start":
diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
index c217cb0c..9abc25d3 100644
--- a/xarray/coding/strings.py
+++ b/xarray/coding/strings.py
@@ -112,7 +112,7 @@ class CharacterArrayCoder(VariableCoder):
                 char_dim_name = encoding.pop("char_dim_name")
             else:
                 char_dim_name = f"string{data.shape[-1]}"
-            dims = dims + (char_dim_name,)
+            dims += (char_dim_name,)
         return Variable(dims, data, attrs, encoding)

     def decode(self, variable, name=None):
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index 0f0a256b..23bf8d49 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -510,7 +510,7 @@ class StringAccessor:
         """
         sep = self._stringify(sep)
         others = tuple(self._stringify(x) for x in others)
-        others = others + (sep,)
+        others += (sep,)

         # sep will go at the end of the input arguments.
         func = lambda *x: x[-1].join(x[:-1])
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index e26c1464..1845cabb 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -407,7 +407,7 @@ def _parse_datasets(

             if dim not in dim_coords:
                 dim_coords[dim] = ds.coords[dim].variable
-        dims = dims | set(ds.dims)
+        dims |= set(ds.dims)

     return dim_coords, dims_sizes, all_coord_names, data_vars

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 00c92c03..60f5d9c0 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -448,7 +448,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
     # Compute timedelta object.
     # For np.datetime64, this can silently yield garbage due to overflow.
     # One option is to enforce 1970-01-01 as the universal offset.
-    array = array - offset
+    array -= offset

     # Scalar is converted to 0d-array
     if not hasattr(array, "dtype"):
@@ -463,7 +463,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):

         # Convert to specified timedelta units.
         if datetime_unit:
-            array = array / np.timedelta64(1, datetime_unit)
+            array /= np.timedelta64(1, datetime_unit)
         return np.where(isnull(array), np.nan, array.astype(dtype))

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 185b4ae5..61c1268b 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -225,7 +225,7 @@ def _apply_loffset(grouper, result):
     )

     if needs_offset:
-        result.index = result.index + grouper.loffset
+        result.index += grouper.loffset

     grouper.loffset = None

diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index c1a4d629..14bd2f7d 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -123,7 +123,7 @@ def _nanmean_ddof_object(ddof, value, axis=None, dtype=None, **kwargs):
         dtype = value.dtype if value.dtype.kind in ["cf"] else float

     data = np.sum(value, axis=axis, dtype=dtype, **kwargs)
-    data = data / (valid_count - ddof)
+    data /= (valid_count - ddof)
     return where_method(data, valid_count != 0)

diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py
index 4a413902..9906836b 100644
--- a/xarray/core/resample_cftime.py
+++ b/xarray/core/resample_cftime.py
@@ -92,9 +92,9 @@ class CFTimeGrouper:
         )
         if self.loffset is not None:
             if isinstance(self.loffset, datetime.timedelta):
-                labels = labels + self.loffset
+                labels += self.loffset
             else:
-                labels = labels + to_offset(self.loffset)
+                labels += to_offset(self.loffset)

         # check binner fits data
         if index[0] < datetime_bins[0]:
@@ -211,7 +211,7 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
     )
     if is_super_daily:
         if closed == "right":
-            datetime_bins = datetime_bins + datetime.timedelta(days=1, microseconds=-1)
+            datetime_bins += datetime.timedelta(days=1, microseconds=-1)
         if datetime_bins[-2] > index.max():
             datetime_bins = datetime_bins[:-1]
             labels = labels[:-1]
@@ -259,7 +259,7 @@ def _get_range_edges(first, last, offset, closed="left", base=0):
         last = normalize_date(last)

     first = offset.rollback(first) if closed == "left" else first - offset
-    last = last + offset
+    last += offset
     return first, last

@@ -296,7 +296,7 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
         adjusted to fix resampling errors.
     """

-    base = base % offset.n
+    base %= offset.n
     start_day = normalize_date(first)
     base_td = type(offset)(n=base).as_timedelta()
     start_day += base_td
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index a49302f7..8526d774 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -637,7 +637,7 @@ def _ensure_plottable(*args):
     other_types = [datetime]
     if cftime is not None:
         cftime_datetime_types = [cftime.datetime]
-        other_types = other_types + cftime_datetime_types
+        other_types += cftime_datetime_types
     else:
         cftime_datetime_types = []
     for x in args:
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index 619fb0ac..1ca2b0ab 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -790,7 +790,7 @@ def test_cftimeindex_sub_timedelta(index):
     ]
     expected = CFTimeIndex(expected_dates)
     result = index + timedelta(days=2)
-    result = result - timedelta(days=1)
+    result -= timedelta(days=1)
     assert result.equals(expected)
     assert isinstance(result, CFTimeIndex)

@@ -811,7 +811,7 @@ def test_cftimeindex_sub_timedelta_array(index, other):
     ]
     expected = CFTimeIndex(expected_dates)
     result = index + timedelta(days=2)
-    result = result - other
+    result -= other
     assert result.equals(expected)
     assert isinstance(result, CFTimeIndex)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 22a3efce..38a8ecbd 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -304,7 +304,7 @@ def test_apply_input_core_dimension() -> None:
     def multiply(*args):
         val = args[0]
         for arg in args[1:]:
-            val = val * arg
+            val *= arg
         return val

     # regression test for GH:2341
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 3b962cb2..9de35a71 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -958,7 +958,7 @@ def build_dask_array(name):
 )
 def test_persist_Dataset(persist):
     ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk()
-    ds = ds + 1
+    ds += 1
     n = len(ds.foo.data.dask)

     ds2 = persist(ds)
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index cdb8382c..971f0c0d 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -3546,7 +3546,7 @@ class TestDataset:
             {"var": ("x", [1, 2, 3])},
             coords={"x": [0, 1, 2], "z1": ("x", [1, 2, 3]), "z2": ("x", [1, 2, 3])},
         )
-        ds["var"] = ds["var"] * 2
+        ds["var"] *= 2
         assert np.allclose(ds["var"], [2, 4, 6])

     def test_setitem_align_new_indexes(self):
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index d48726e8..28afc0fb 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -998,7 +998,7 @@ class TestDataArrayGroupBy:

     def test_groupby_map_changes_metadata(self):
         def change_metadata(x):
-            x.coords["x"] = x.coords["x"] * 2
+            x.coords["x"] *= 2
             x.attrs["fruit"] = "lemon"
             return x

diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index f36143c5..ef735a17 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -515,7 +515,7 @@ def test_align_dataarray(value, variant, unit, error, dtype):
     data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y"))
     data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y"))

-    fill_value = fill_value * data_unit2
+    fill_value *= data_unit2
     func = function(xr.align, join="outer", fill_value=fill_value)
     if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)):
         with pytest.raises(error):
@@ -619,7 +619,7 @@ def test_align_dataset(value, unit, variant, error, dtype):
     ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1)
     ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2)

-    fill_value = fill_value * data_unit2
+    fill_value *= data_unit2
     func = function(xr.align, join="outer", fill_value=fill_value)
     if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)):
         with pytest.raises(error):
@@ -1416,7 +1416,7 @@ def test_where_dataarray(fill_value, unit, error, dtype):

     x = xr.DataArray(data=array, dims="x")
     cond = x < 5 * unit_registry.m
-    fill_value = fill_value * unit
+    fill_value *= unit

     if error is not None and not (
         np.isnan(fill_value) and not isinstance(fill_value, Quantity)
@@ -1460,7 +1460,7 @@ def test_where_dataset(fill_value, unit, error, dtype):

     ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)})
     cond = array1 < 2 * unit_registry.m
-    fill_value = fill_value * unit
+    fill_value *= unit

     if error is not None and not (
         np.isnan(fill_value) and not isinstance(fill_value, Quantity)
shoyer commented 3 years ago

No, thanks. Some of these are on mutable types like NumPy arrays and are converting them to modify objects in place. This could introduce bugs.

elfring commented 3 years ago

This could introduce bugs.