CI Failures - Githubissues

🐛 Describe the bug

=========================== short test summary info ============================
FAILED test/datasets/test_elliptic.py::test_elliptic_bitcoin_dataset - TypeEr...
FAILED test/explain/metric/test_basic_metric.py::test_groundtruth_metrics - V...
FAILED test/explain/metric/test_basic_metric.py::test_perfect_groundtruth_metrics
FAILED test/explain/metric/test_basic_metric.py::test_groundtruth_true_negative
FAILED test/nn/models/test_basic_gnn.py::test_onnx - ValueError: This ORT bui...
===== 5 failed, 3701 passed, 475 skipped, 36 warnings in 262.64s (0:04:22) =====

=================================== FAILURES ===================================
________________________ test_elliptic_bitcoin_dataset _________________________

get_dataset = functools.partial(<function load_dataset at 0x7fcf7b6f9040>, '/tmp/pyg_test_datasets')

    @onlyFullTest
    def test_elliptic_bitcoin_dataset(get_dataset):
>       dataset = get_dataset(name='EllipticBitcoinDataset')

test/datasets/test_elliptic.py:6: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
test/conftest.py:41: in load_dataset
    return EllipticBitcoinDataset(path, *args, **kwargs)
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/datasets/elliptic.py:60: in __init__
    super().__init__(root, transform, pre_transform)
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/data/in_memory_dataset.py:56: in __init__
    super().__init__(root, transform, pre_transform, pre_filter, log)
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/data/dataset.py:94: in __init__
    self._process()
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/data/dataset.py:221: in _process
    self.process()
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/datasets/elliptic.py:90: in process
    x = torch.from_numpy(df_features.loc[:, 2:].values).to(torch.float)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:873: in __getitem__
    return self._getitem_tuple(key)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1055: in _getitem_tuple
    return self._getitem_tuple_same_dim(tup)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:750: in _getitem_tuple_same_dim
    retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1088: in _getitem_axis
    return self._get_slice_axis(key, axis=axis)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1122: in _get_slice_axis
    indexer = labels.slice_indexer(
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py:4966: in slice_indexer
    start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py:5169: in slice_locs
    start_slice = self.get_slice_bound(start, "left", kind)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py:5079: in get_slice_bound
    label = self._maybe_cast_slice_bound(label, side, kind)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py:5031: in _maybe_cast_slice_bound
    self._invalid_indexer("slice", label)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Index([     'txId', 'time_step',           2,           3,           4,
                 5,           6,           7, ...        161,
               162,         163,         164,         165,         166],
      dtype='object', length=167)
form = 'slice', key = 2

    def _invalid_indexer(self, form: str_t, key):
        """
        Consistent invalid indexer message.
        """
>       raise TypeError(
            f"cannot do {form} indexing on {type(self).__name__} with these "
            f"indexers [{key}] of type {type(key).__name__}"
        )
E       TypeError: cannot do slice indexing on Index with these indexers [2] of type int

/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py:3267: TypeError
___________________________ test_groundtruth_metrics ___________________________

    def test_groundtruth_metrics():
        pred_mask = torch.rand(10)
        target_mask = torch.rand(10)

>       accuracy, recall, precision, f1_score, auroc = groundtruth_metrics(
            pred_mask, target_mask)

test/explain/metric/test_basic_metric.py:12: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/explain/metric/basic.py:52: in groundtruth_metrics
    out = fn(pred_mask, target_mask, 'binary', threshold)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

preds = tensor([0.5783, 0.4249, 0.8071, 0.0037, 0.0704, 0.9466, 0.4047, 0.7245, 0.9427,
        0.4929])
target = tensor([ True,  True, False, False,  True,  True, False, False,  True,  True])
average = 'binary', mdmc_average = 0.5, threshold = 0.5, top_k = None
subset_accuracy = False, num_classes = None, multiclass = None
ignore_index = None

    def accuracy(
        preds: Tensor,
        target: Tensor,
        average: Optional[str] = "micro",
        mdmc_average: Optional[str] = "global",
        threshold: float = 0.5,
        top_k: Optional[int] = None,
        subset_accuracy: bool = False,
        num_classes: Optional[int] = None,
        multiclass: Optional[bool] = None,
        ignore_index: Optional[int] = None,
    ) -> Tensor:
        r"""Computes `Accuracy`_

        .. math::
            \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)

        Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
        tensor of predictions.

        For multi-class and multi-dimensional multi-class data with probability or logits predictions, the
        parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
        top-K highest probability or logits items are considered to find the correct label.

        For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
        accuracy by default, which counts all labels or sub-samples separately. This can be
        changed to subset accuracy (which requires all labels or sub-samples in the sample to
        be correctly predicted) by setting ``subset_accuracy=True``.

        Accepts all input types listed in :ref:`pages/classification:input types`.

        Args:
            preds: Predictions from model (probabilities, logits or labels)
            target: Ground truth labels
            average:
                Defines the reduction that is applied. Should be one of the following:

                - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
                - ``'macro'``: Calculate the metric for each class separately, and average the
                  metrics across classes (with equal weights for each class).
                - ``'weighted'``: Calculate the metric for each class separately, and average the
                  metrics across classes, weighting each class by its support (``tp + fn``).
                - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
                  the metric for every class.
                - ``'samples'``: Calculate the metric for each sample, and average the metrics
                  across samples (with equal weights for each sample).

                .. note:: What is considered a sample in the multi-dimensional multi-class case
                    depends on the value of ``mdmc_average``.

                .. note:: If ``'none'`` and a given class doesn't occur in the ``preds`` or ``target``,
                    the value for the class will be ``nan``.

            mdmc_average:
                Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
                ``average`` parameter). Should be one of the following:

                - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional multi-class.

                - ``'samplewise'``: In this case, the statistics are computed separately for each
                  sample on the ``N`` axis, and then averaged over samples.
                  The computation for each sample is done by treating the flattened extra axes ``...``
                  (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
                  and computing the metric for the sample based on that.

                - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
                  (see :ref:`pages/classification:input types`)
                  are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
                  were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.

            num_classes:
                Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.

            threshold:
                Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
                of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
            top_k:
                Number of the highest probability or logit score predictions considered finding the correct label,
                relevant only for (multi-dimensional) multi-class inputs. The
                default value (``None``) will be interpreted as 1 for these inputs.

                Should be left at default (``None``) for all other types of inputs.
            multiclass:
                Used only in certain special cases, where you want to treat inputs as a different type
                than what they appear to be. See the parameter's
                :ref:`documentation section <pages/classification:using the multiclass parameter>`
                for a more detailed explanation and examples.
            ignore_index:
                Integer specifying a target class to ignore. If given, this class index does not contribute
                to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
                or ``'none'``, the score for the ignored class will be returned as ``nan``.
            subset_accuracy:
                Whether to compute subset accuracy for multi-label and multi-dimensional
                multi-class inputs (has no effect for other input types).

                - For multi-label inputs, if the parameter is set to ``True``, then all labels for
                  each sample must be correctly predicted for the sample to count as correct. If it
                  is set to ``False``, then all labels are counted separately - this is equivalent to
                  flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).

                - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all
                  sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
                  If it is set to ``False``, then all sub-samples are counter separately - this is equivalent,
                  in the case of label predictions, to flattening the inputs beforehand (i.e.
                  ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
                  still applies in both cases, if set.

        Raises:
            ValueError:
                If ``top_k`` parameter is set for ``multi-label`` inputs.
            ValueError:
                If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
            ValueError:
                If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
            ValueError:
                If ``average`` is set but ``num_classes`` is not provided.
            ValueError:
                If ``num_classes`` is set
                and ``ignore_index`` is not in the range ``[0, num_classes)``.
            ValueError:
                If ``top_k`` is not an ``integer`` larger than ``0``.

        Example:
            >>> import torch
            >>> from torchmetrics.functional import accuracy
            >>> target = torch.tensor([0, 1, 2, 3])
            >>> preds = torch.tensor([0, 2, 1, 3])
            >>> accuracy(preds, target)
            tensor(0.5000)

            >>> target = torch.tensor([0, 1, 2])
            >>> preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
            >>> accuracy(preds, target, top_k=2)
            tensor(0.6667)
        """
        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
        if average not in allowed_average:
>           raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
E           ValueError: The `average` has to be one of ['micro', 'macro', 'weighted', 'samples', 'none', None], got binary.

/usr/local/lib/python3.8/dist-packages/torchmetrics/functional/classification/accuracy.py:395: ValueError
_______________________ test_perfect_groundtruth_metrics _______________________

    def test_perfect_groundtruth_metrics():
        pred_mask = target_mask = torch.rand(10)

>       accuracy, recall, precision, f1_score, auroc = groundtruth_metrics(
            pred_mask, target_mask)

test/explain/metric/test_basic_metric.py:25: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/explain/metric/basic.py:52: in groundtruth_metrics
    out = fn(pred_mask, target_mask, 'binary', threshold)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

preds = tensor([0.9914, 0.0506, 0.0854, 0.1119, 0.6376, 0.5309, 0.2076, 0.4813, 0.5845,
        0.2689])
target = tensor([ True, False, False, False,  True,  True, False, False,  True, False])
average = 'binary', mdmc_average = 0.5, threshold = 0.5, top_k = None
subset_accuracy = False, num_classes = None, multiclass = None
ignore_index = None

    def accuracy(
        preds: Tensor,
        target: Tensor,
        average: Optional[str] = "micro",
        mdmc_average: Optional[str] = "global",
        threshold: float = 0.5,
        top_k: Optional[int] = None,
        subset_accuracy: bool = False,
        num_classes: Optional[int] = None,
        multiclass: Optional[bool] = None,
        ignore_index: Optional[int] = None,
    ) -> Tensor:
        r"""Computes `Accuracy`_

        .. math::
            \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)

        Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
        tensor of predictions.

        For multi-class and multi-dimensional multi-class data with probability or logits predictions, the
        parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
        top-K highest probability or logits items are considered to find the correct label.

        For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
        accuracy by default, which counts all labels or sub-samples separately. This can be
        changed to subset accuracy (which requires all labels or sub-samples in the sample to
        be correctly predicted) by setting ``subset_accuracy=True``.

        Accepts all input types listed in :ref:`pages/classification:input types`.

        Args:
            preds: Predictions from model (probabilities, logits or labels)
            target: Ground truth labels
            average:
                Defines the reduction that is applied. Should be one of the following:

                - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
                - ``'macro'``: Calculate the metric for each class separately, and average the
                  metrics across classes (with equal weights for each class).
                - ``'weighted'``: Calculate the metric for each class separately, and average the
                  metrics across classes, weighting each class by its support (``tp + fn``).
                - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
                  the metric for every class.
                - ``'samples'``: Calculate the metric for each sample, and average the metrics
                  across samples (with equal weights for each sample).

                .. note:: What is considered a sample in the multi-dimensional multi-class case
                    depends on the value of ``mdmc_average``.

                .. note:: If ``'none'`` and a given class doesn't occur in the ``preds`` or ``target``,
                    the value for the class will be ``nan``.

            mdmc_average:
                Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
                ``average`` parameter). Should be one of the following:

                - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional multi-class.

                - ``'samplewise'``: In this case, the statistics are computed separately for each
                  sample on the ``N`` axis, and then averaged over samples.
                  The computation for each sample is done by treating the flattened extra axes ``...``
                  (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
                  and computing the metric for the sample based on that.

                - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
                  (see :ref:`pages/classification:input types`)
                  are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
                  were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.

            num_classes:
                Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.

            threshold:
                Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
                of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
            top_k:
                Number of the highest probability or logit score predictions considered finding the correct label,
                relevant only for (multi-dimensional) multi-class inputs. The
                default value (``None``) will be interpreted as 1 for these inputs.

                Should be left at default (``None``) for all other types of inputs.
            multiclass:
                Used only in certain special cases, where you want to treat inputs as a different type
                than what they appear to be. See the parameter's
                :ref:`documentation section <pages/classification:using the multiclass parameter>`
                for a more detailed explanation and examples.
            ignore_index:
                Integer specifying a target class to ignore. If given, this class index does not contribute
                to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
                or ``'none'``, the score for the ignored class will be returned as ``nan``.
            subset_accuracy:
                Whether to compute subset accuracy for multi-label and multi-dimensional
                multi-class inputs (has no effect for other input types).

                - For multi-label inputs, if the parameter is set to ``True``, then all labels for
                  each sample must be correctly predicted for the sample to count as correct. If it
                  is set to ``False``, then all labels are counted separately - this is equivalent to
                  flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).

                - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all
                  sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
                  If it is set to ``False``, then all sub-samples are counter separately - this is equivalent,
                  in the case of label predictions, to flattening the inputs beforehand (i.e.
                  ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
                  still applies in both cases, if set.

        Raises:
            ValueError:
                If ``top_k`` parameter is set for ``multi-label`` inputs.
            ValueError:
                If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
            ValueError:
                If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
            ValueError:
                If ``average`` is set but ``num_classes`` is not provided.
            ValueError:
                If ``num_classes`` is set
                and ``ignore_index`` is not in the range ``[0, num_classes)``.
            ValueError:
                If ``top_k`` is not an ``integer`` larger than ``0``.

        Example:
            >>> import torch
            >>> from torchmetrics.functional import accuracy
            >>> target = torch.tensor([0, 1, 2, 3])
            >>> preds = torch.tensor([0, 2, 1, 3])
            >>> accuracy(preds, target)
            tensor(0.5000)

            >>> target = torch.tensor([0, 1, 2])
            >>> preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
            >>> accuracy(preds, target, top_k=2)
            tensor(0.6667)
        """
        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
        if average not in allowed_average:
>           raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
E           ValueError: The `average` has to be one of ['micro', 'macro', 'weighted', 'samples', 'none', None], got binary.

/usr/local/lib/python3.8/dist-packages/torchmetrics/functional/classification/accuracy.py:395: ValueError
________________________ test_groundtruth_true_negative ________________________

    def test_groundtruth_true_negative():
        warnings.filterwarnings('ignore', '.*No positive samples in targets.*')
        pred_mask = target_mask = torch.zeros(10)

>       accuracy, recall, precision, f1_score, auroc = groundtruth_metrics(
            pred_mask, target_mask)

test/explain/metric/test_basic_metric.py:39: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.8/dist-packages/torch_geometric-2.3.0-py3.8.egg/torch_geometric/explain/metric/basic.py:52: in groundtruth_metrics
    out = fn(pred_mask, target_mask, 'binary', threshold)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

preds = tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
target = tensor([False, False, False, False, False, False, False, False, False, False])
average = 'binary', mdmc_average = 0.5, threshold = 0.5, top_k = None
subset_accuracy = False, num_classes = None, multiclass = None
ignore_index = None

    def accuracy(
        preds: Tensor,
        target: Tensor,
        average: Optional[str] = "micro",
        mdmc_average: Optional[str] = "global",
        threshold: float = 0.5,
        top_k: Optional[int] = None,
        subset_accuracy: bool = False,
        num_classes: Optional[int] = None,
        multiclass: Optional[bool] = None,
        ignore_index: Optional[int] = None,
    ) -> Tensor:
        r"""Computes `Accuracy`_

        .. math::
            \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)

        Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
        tensor of predictions.

        For multi-class and multi-dimensional multi-class data with probability or logits predictions, the
        parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
        top-K highest probability or logits items are considered to find the correct label.

        For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
        accuracy by default, which counts all labels or sub-samples separately. This can be
        changed to subset accuracy (which requires all labels or sub-samples in the sample to
        be correctly predicted) by setting ``subset_accuracy=True``.

        Accepts all input types listed in :ref:`pages/classification:input types`.

        Args:
            preds: Predictions from model (probabilities, logits or labels)
            target: Ground truth labels
            average:
                Defines the reduction that is applied. Should be one of the following:

                - ``'micro'`` [default]: Calculate the metric globally, across all samples and classes.
                - ``'macro'``: Calculate the metric for each class separately, and average the
                  metrics across classes (with equal weights for each class).
                - ``'weighted'``: Calculate the metric for each class separately, and average the
                  metrics across classes, weighting each class by its support (``tp + fn``).
                - ``'none'`` or ``None``: Calculate the metric for each class separately, and return
                  the metric for every class.
                - ``'samples'``: Calculate the metric for each sample, and average the metrics
                  across samples (with equal weights for each sample).

                .. note:: What is considered a sample in the multi-dimensional multi-class case
                    depends on the value of ``mdmc_average``.

                .. note:: If ``'none'`` and a given class doesn't occur in the ``preds`` or ``target``,
                    the value for the class will be ``nan``.

            mdmc_average:
                Defines how averaging is done for multi-dimensional multi-class inputs (on top of the
                ``average`` parameter). Should be one of the following:

                - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional multi-class.

                - ``'samplewise'``: In this case, the statistics are computed separately for each
                  sample on the ``N`` axis, and then averaged over samples.
                  The computation for each sample is done by treating the flattened extra axes ``...``
                  (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
                  and computing the metric for the sample based on that.

                - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
                  (see :ref:`pages/classification:input types`)
                  are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
                  were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.

            num_classes:
                Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.

            threshold:
                Threshold for transforming probability or logit predictions to binary (0,1) predictions, in the case
                of binary or multi-label inputs. Default value of 0.5 corresponds to input being probabilities.
            top_k:
                Number of the highest probability or logit score predictions considered finding the correct label,
                relevant only for (multi-dimensional) multi-class inputs. The
                default value (``None``) will be interpreted as 1 for these inputs.

                Should be left at default (``None``) for all other types of inputs.
            multiclass:
                Used only in certain special cases, where you want to treat inputs as a different type
                than what they appear to be. See the parameter's
                :ref:`documentation section <pages/classification:using the multiclass parameter>`
                for a more detailed explanation and examples.
            ignore_index:
                Integer specifying a target class to ignore. If given, this class index does not contribute
                to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
                or ``'none'``, the score for the ignored class will be returned as ``nan``.
            subset_accuracy:
                Whether to compute subset accuracy for multi-label and multi-dimensional
                multi-class inputs (has no effect for other input types).

                - For multi-label inputs, if the parameter is set to ``True``, then all labels for
                  each sample must be correctly predicted for the sample to count as correct. If it
                  is set to ``False``, then all labels are counted separately - this is equivalent to
                  flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).

                - For multi-dimensional multi-class inputs, if the parameter is set to ``True``, then all
                  sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
                  If it is set to ``False``, then all sub-samples are counter separately - this is equivalent,
                  in the case of label predictions, to flattening the inputs beforehand (i.e.
                  ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
                  still applies in both cases, if set.

        Raises:
            ValueError:
                If ``top_k`` parameter is set for ``multi-label`` inputs.
            ValueError:
                If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"samples"``, ``"none"``, ``None``.
            ValueError:
                If ``mdmc_average`` is not one of ``None``, ``"samplewise"``, ``"global"``.
            ValueError:
                If ``average`` is set but ``num_classes`` is not provided.
            ValueError:
                If ``num_classes`` is set
                and ``ignore_index`` is not in the range ``[0, num_classes)``.
            ValueError:
                If ``top_k`` is not an ``integer`` larger than ``0``.

        Example:
            >>> import torch
            >>> from torchmetrics.functional import accuracy
            >>> target = torch.tensor([0, 1, 2, 3])
            >>> preds = torch.tensor([0, 2, 1, 3])
            >>> accuracy(preds, target)
            tensor(0.5000)

            >>> target = torch.tensor([0, 1, 2])
            >>> preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
            >>> accuracy(preds, target, top_k=2)
            tensor(0.6667)
        """
        allowed_average = ["micro", "macro", "weighted", "samples", "none", None]
        if average not in allowed_average:
>           raise ValueError(f"The `average` has to be one of {allowed_average}, got {average}.")
E           ValueError: The `average` has to be one of ['micro', 'macro', 'weighted', 'samples', 'none', None], got binary.

/usr/local/lib/python3.8/dist-packages/torchmetrics/functional/classification/accuracy.py:395: ValueError
__________________________________ test_onnx ___________________________________

    @withPackage('onnx', 'onnxruntime')
    def test_onnx():
        import onnx
        import onnxruntime as ort

        class MyModel(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv1 = SAGEConv(8, 16)
                self.conv2 = SAGEConv(16, 16)

            def forward(self, x, edge_index):
                x = self.conv1(x, edge_index).relu()
                x = self.conv2(x, edge_index)
                return x

        model = MyModel()
        x = torch.randn(3, 8)
        edge_index = torch.tensor([[0, 1, 2], [1, 0, 2]])
        expected = model(x, edge_index)
        assert expected.size() == (3, 16)

        torch.onnx.export(model, (x, edge_index), 'model.onnx',
                          input_names=('x', 'edge_index'), opset_version=16)

        model = onnx.load('model.onnx')
        onnx.checker.check_model(model)

>       ort_session = ort.InferenceSession('model.onnx')

test/nn/models/test_basic_gnn.py:197: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.8/dist-packages/onnxruntime/capi/onnxruntime_inference_collection.py:360: in __init__
    self._create_inference_session(providers, provider_options, disabled_optimizers)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <onnxruntime.capi.onnxruntime_inference_collection.InferenceSession object at 0x7fd0d8a18940>
providers = [], provider_options = [], disabled_optimizers = None

    def _create_inference_session(self, providers, provider_options, disabled_optimizers=None):
        available_providers = C.get_available_providers()

        # Tensorrt can fall back to CUDA. All others fall back to CPU.
        if "TensorrtExecutionProvider" in available_providers:
            self._fallback_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
        elif "MIGraphXExecutionProvider" in available_providers:
            self._fallback_providers = ["ROCMExecutionProvider", "CPUExecutionProvider"]
        else:
            self._fallback_providers = ["CPUExecutionProvider"]

        # validate providers and provider_options before other initialization
        providers, provider_options = check_and_normalize_provider_args(
            providers, provider_options, available_providers
        )
        if providers == [] and len(available_providers) > 1:
            self.disable_fallback()
>           raise ValueError(
                "This ORT build has {} enabled. ".format(available_providers)
                + "Since ORT 1.9, you are required to explicitly set "
                + "the providers parameter when instantiating InferenceSession. For example, "
                "onnxruntime.InferenceSession(..., providers={}, ...)".format(available_providers)
            )
E           ValueError: This ORT build has ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'] enabled. Since ORT 1.9, you are required to explicitly set the providers parameter when instantiating InferenceSession. For example, onnxruntime.InferenceSession(..., providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'], ...)

/usr/local/lib/python3.8/dist-packages/onnxruntime/capi/onnxruntime_inference_collection.py:388: ValueError

Environment

PyG/pyg-lib version: latest
PyTorch version: 2.0
CUDA: 12
How you installed PyTorch and PyG (conda, pip, source): source build

pyg-team / pytorch_geometric

CI Failures #6765

🐛 Describe the bug

Environment