synthesized-io / insight

🧿 Metrics & Monitoring of Datasets
BSD 3-Clause "New" or "Revised" License
12 stars 0 forks source link

Configure metrics with YAML #62

Closed simonhkswan closed 2 years ago

simonhkswan commented 2 years ago

The idea has two steps:

  1. adding Metric.to_dict() and Metric.from_dict(d: Dict[str, Any])
  2. Convert python dictionaries to/from YAML.
class Metric1(ABC):
    def __init__(self, a, b):
        self.a = a
        self.b = b

    @property
    def a(self) -> float:
       return self._a

    def to_dict(self) -> Dict[str, Any]:
        raise NotImplementedError

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> Metric:
        raise NotImplementedError

class Metric2(Metric1):
    def __init__(self, a):
        super().__init__(a, None)

    @property
    def a(self) -> float:
       return self._a

    @property
    def b(self) -> float:
       return self._a

    def to_dict(self) -> Dict[str, Any]:
        raise NotImplementedError

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> Metric:
        raise NotImplementedError

A good test of the functionality:


# for any metric
my_metric = Metric(**config)

d = my_metric.to_dict()
my_metric2 = Metric.from_dict(d)

Assert my_metric(df) == my_metric2(df)

If i am given a dictionary, which function to I call to recreate the right metric?


d = load_dict(...)

EarthMoversDistance.from_dict(d)
# or do i call
KolmogorovSmirnovDistance.from_dict(d, check: Check)
# or...
TwoColumnMetric.from_dict(d)
{
  "metric_name": "EarthMoversDistance",
  "other_parameter": 0.3 
}
class Metric(ABC):

    _registry: Dict[str, Type[Metric]] = dict()

    @classmethod
    def _register_class(cls):
        ...
simonhkswan commented 2 years ago

class MetricFactory():

    def metric_from_dict(self, d: Dict[str, Any]) -> Metric:
        ...
Hebruwu commented 2 years ago

"""
Option 1: MetricFactory in base.py
    Step 1: Create a registry of all the metrics.
        Substep: recurse through _Metric children in depth first search and add to to _registry with 'name' as the key
            and class reference as the value.
    Step 2: given a dictionary which contains the name of the metric and parameters, call the metric from _registry.
"""
class MetricFactory:
    _registry: Dict[str, Any] = {}

    @classmethod
    def _update_registry_recuresively(cls, next_metric):
        if next_metric.name is not None:
            cls._registry.append({next_metric.name: next_metric})
        for subclass in next_metric.__subclasses__():
            cls._update_registry_recuresively(subclass)

    @classmethod
    def metric_from_dict(cls, bluprnt: Dict[str, any]):
        if len (cls._registry) == 0:
            cls._update_registry_recuresively(_Metric)

        # STEP: Create a list of named parameters based on dictionary entries to pass into the Metric constructor.

        new_metric = cls._registry[bluprnt['name']](...)
        return new_metric

# Benefit: simple and does not interfere with the metrics.
Hebruwu commented 2 years ago

"""
Option 2: MetricFactory in metric.py or in a separate module.
    Step 1: Create a registry of all the metrics.
        Substep: recurse through OneColumnMetric, TwoColumnMetric, DataFrameMetric, and TwoDataFrameMetric children in 
        depth first search and add to to _registry with 'name' as the key and class reference as the value.
    Step 2: given a dictionary which contains the name of the metric and parameters, call the metric from _registry.
"""

class MetricFactory:
    _registry: Dict[str, Any] = {}

    @classmethod
    def _update_registry_recuresively(cls, next_metric):
        if next_metric.name is not None:
            cls._registry.append({next_metric.name: next_metric})
        for subclass in next_metric.__subclasses__():
            cls._update_registry_recuresively(subclass)

    @classmethod
    def metric_from_dict(cls, bluprnt: Dict[str, any]):
        if len(cls._registry) == 0:
            cls._update_registry_recuresively(OneColumnMetric)
            cls._update_registry_recuresively(TwoColumnMetric)
            cls._update_registry_recuresively(DataFrameMetric)
            cls._update_registry_recuresively(TwoDataFrameMetric)

        # STEP: Create a list of named parameters based on dictionary entries to pass into the Metric constructor.

        new_metric = cls._registry[bluprnt['name']](...)
        return new_metric

# Benefit: does not interfere with the metrics. Does not interfere with a private class.
Hebruwu commented 2 years ago
"""
Option 3: As a class method of _Metric.
    Step 1: Create a registry of all the metrics.
        Substep: recurse through OneColumnMetric, TwoColumnMetric, DataFrameMetric, and TwoDataFrameMetric children in 
        depth first search and add to to _registry with 'name' as the key and class reference as the value.
    Step 2: given a dictionary which contains the name of the metric and parameters, call the metric from _registry.
"""

class _Metric:
    _registry: Dict[str, Any] = {}

    @classmethod
    def _update_registry_recuresively(cls, next_metric):
        if next_metric.name is not None:
            cls._registry.append({next_metric.name: next_metric})
        for subclass in next_metric.__subclasses__():
            cls._update_registry_recuresively(subclass)

    @classmethod
    def metric_from_dict(cls, bluprnt: Dict[str, any]):
        if len(cls._registry) == 0:
            cls._update_registry_recuresively(cls)

        # STEP: Create a list of named parameters based on dictionary entries to pass into the Metric constructor.

        new_metric = cls._registry[bluprnt['name']](...)
        return new_metric

# Benefit: Can call from any metric with the same result.
Hebruwu commented 2 years ago

Note: what is labelled as a STEP should be possible to do through argument list unpacking.

Hebruwu commented 2 years ago

Image