ocean-data-factory-sweden / kso

Notebooks to upload/download marine footage, connect to a citizen science project, train machine learning models and publish marine biological observations.
GNU General Public License v3.0
4 stars 12 forks source link

Tutorial 9 issue with detection (likely also in tutorial 6, but have not tested) #344

Closed Bergylta closed 5 months ago

Bergylta commented 5 months ago

šŸ› Bug

The run finishes, but a error occurs at the end

To Reproduce (REQUIRED)

Input: Model: GU_crabs_gobies_wrasses_1

mlp.detect_yolo(
    source=pp.movies_paths,
    save_dir=save_dir.selected,
    conf_thres=conf_thres.value,
    artifact_dir=artifact_dir,
    save_output=True,
    project=mlp.project_name,
    name=exp_name.value,
    model=model.value,
    latest=False,
)

Output:

TypeError                                 Traceback (most recent call last)
Cell In[20], line 1
----> 1 mlp.detect_yolo(
      2     source=pp.movies_paths,
      3     save_dir=save_dir.selected,
      4     conf_thres=conf_thres.value,
      5     artifact_dir=artifact_dir,
      6     save_output=True,
      7     project=mlp.project_name,
      8     name=exp_name.value,
      9     model=model.value,
     10     latest=False,
     11 )

File /usr/src/app/kso-dev/kso_utils/project.py:1751, in MLProjectProcessor.detect_yolo(self, project, name, source, save_dir, conf_thres, artifact_dir, model, img_size, save_output, test, latest)
   1740 else:
   1741     self.modules["detect"].run(
   1742         weights=best_model,
   1743         source=source,
   (...)
   1749         nosave=not save_output,
   1750     )
-> 1751 self.save_detections(conf_thres, model.ckpt_path, self.eval_dir)

File /usr/src/app/kso-dev/kso_utils/project.py:1820, in MLProjectProcessor.save_detections(self, conf_thres, model, eval_dir)
   1812         species_mapping = {}
   1814     self.modules["yolo_utils"].set_config(
   1815         conf=conf_thres,
   1816         model_name=model,
   1817         evaluation_directory=eval_dir,
   1818         species_map=species_mapping,
   1819     )
-> 1820     self.csv_report = self.modules["yolo_utils"].generate_csv_report(
   1821         eval_dir, self.run, log=True, registry=self.registry
   1822     )
   1823     self.modules["yolo_utils"].add_data(
   1824         eval_dir, "detection_output", self.registry, self.run
   1825     )
   1826 elif self.registry == "mlflow":

File /usr/src/app/kso-dev/kso_utils/yolo_utils.py:1048, in generate_csv_report(evaluation_path, run, log, registry)
   1046 if log:
   1047     if registry == "wandb":
-> 1048         wandb.log({"predictions": wandb.Table(dataframe=detect_df)})
   1049     elif registry == "mlflow":
   1050         pass

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:419, in _run_decorator._noop.<locals>.wrapper(self, *args, **kwargs)
    416         wandb.termwarn(message, repeat=False)
    417         return cls.Dummy()
--> 419 return func(self, *args, **kwargs)

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:370, in _run_decorator._noop_on_finish.<locals>.decorator_fn.<locals>.wrapper_fn(self, *args, **kwargs)
    367 @functools.wraps(func)
    368 def wrapper_fn(self: Type["Run"], *args: Any, **kwargs: Any) -> Any:
    369     if not getattr(self, "_is_finished", False):
--> 370         return func(self, *args, **kwargs)
    372     default_message = (
    373         f"Run ({self.id}) is finished. The call to `{func.__name__}` will be ignored. "
    374         f"Please make sure that you are using an active run."
    375     )
    376     resolved_message = message or default_message

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:360, in _run_decorator._attach.<locals>.wrapper(self, *args, **kwargs)
    358         raise e
    359     cls._is_attaching = ""
--> 360 return func(self, *args, **kwargs)

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:1792, in Run.log(self, data, step, commit, sync)
   1785 if sync is not None:
   1786     deprecate.deprecate(
   1787         field_name=deprecate.Deprecated.run__log_sync,
   1788         warning_message=(
   1789             "`sync` argument is deprecated and does not affect the behaviour of `wandb.log`"
   1790         ),
   1791     )
-> 1792 self._log(data=data, step=step, commit=commit)

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:1567, in Run._log(self, data, step, commit)
   1564 if any(not isinstance(key, str) for key in data.keys()):
   1565     raise ValueError("Key values passed to `wandb.log` must be strings.")
-> 1567 self._partial_history_callback(data, step, commit)
   1569 if step is not None:
   1570     if os.getpid() != self._init_pid or self._is_attached:

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/wandb_run.py:1439, in Run._partial_history_callback(self, row, step, commit)
   1436 if self._backend and self._backend.interface:
   1437     not_using_tensorboard = len(wandb.patched["tensorboard"]) == 0
-> 1439     self._backend.interface.publish_partial_history(
   1440         row,
   1441         user_step=self._step,
   1442         step=step,
   1443         flush=commit,
   1444         publish_step=not_using_tensorboard,
   1445     )

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/interface/interface.py:528, in InterfaceBase.publish_partial_history(self, data, user_step, step, flush, publish_step, run)
    517 def publish_partial_history(
    518     self,
    519     data: dict,
   (...)
    524     run: Optional["Run"] = None,
    525 ) -> None:
    526     run = run or self._run
--> 528     data = history_dict_to_json(run, data, step=user_step, ignore_copy_err=True)
    529     data.pop("_step", None)
    531     # add timestamp to the history request, if not already present
    532     # the timestamp might come from the tensorboard log logic

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/data_types/utils.py:52, in history_dict_to_json(run, payload, step, ignore_copy_err)
     48         payload[key] = history_dict_to_json(
     49             run, val, step=step, ignore_copy_err=ignore_copy_err
     50         )
     51     else:
---> 52         payload[key] = val_to_json(
     53             run, key, val, namespace=step, ignore_copy_err=ignore_copy_err
     54         )
     56 return payload

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/data_types/utils.py:156, in val_to_json(run, key, val, namespace, ignore_copy_err)
    154     sanitized_key = re.sub(r"[^a-zA-Z0-9_]+", "", key)
    155     art = wandb.Artifact(f"run-{run.id}-{sanitized_key}", "run_table")
--> 156     art.add(val, key)
    157     run.log_artifact(art)
    159 # Partitioned tables and joined tables do not support being bound to runs.

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/artifacts/artifact.py:1449, in Artifact.add(self, obj, name)
   1447     with self.new_file(name) as f:
   1448         file_path = f.name
-> 1449         do_write(f)
   1451 # Note, we add the file from our temp directory.
   1452 # It will be added again later on finalize, but succeed since
   1453 # the checksum should match
   1454 entry = self.add_file(file_path, name, is_tmp_name)

File /usr/local/lib/python3.8/dist-packages/wandb/sdk/artifacts/artifact.py:1437, in Artifact.add.<locals>.do_write(f)
   1434 import json
   1436 # TODO: Do we need to open with utf-8 codec?
-> 1437 f.write(json.dumps(val, sort_keys=True))

File /usr/lib/python3.8/json/__init__.py:234, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    232 if cls is None:
    233     cls = JSONEncoder
--> 234 return cls(
    235     skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236     check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237     separators=separators, default=default, sort_keys=sort_keys,
    238     **kw).encode(obj)

File /usr/lib/python3.8/json/encoder.py:199, in JSONEncoder.encode(self, o)
    195         return encode_basestring(o)
    196 # This doesn't pass the iterator directly to ''.join() because the
    197 # exceptions aren't as detailed.  The list call should be roughly
    198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
    200 if not isinstance(chunks, (list, tuple)):
    201     chunks = list(chunks)

File /usr/lib/python3.8/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
    252 else:
    253     _iterencode = _make_iterencode(
    254         markers, self.default, _encoder, self.indent, floatstr,
    255         self.key_separator, self.item_separator, self.sort_keys,
    256         self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)

File /usr/lib/python3.8/json/encoder.py:179, in JSONEncoder.default(self, o)
    160 def default(self, o):
    161     """Implement this method in a subclass such that it returns
    162     a serializable object for ``o``, or calls the base implementation
    163     (to raise a ``TypeError``).
   (...)
    177 
    178     """
--> 179     raise TypeError(f'Object of type {o.__class__.__name__} '
    180                     f'is not JSON serializable')

TypeError: Object of type PosixPath is not JSON serializable

Expected behavior

A clear and concise description of what you expected to happen.

Environment

If applicable, add screenshots to help explain your problem.

Additional context

Add any other context about the problem here.