portugueslab / stytra

A modular package to control stimulation and track behaviour
http://www.portugueslab.com/stytra/
GNU General Public License v3.0
41 stars 26 forks source link

Hangs on offline "Track Video" #30

Closed tbenst closed 4 months ago

tbenst commented 4 years ago

Had immediate success with seeing live plots with offline tail tracking, thanks for great interface! However, I'm seeing a program hang when I click "Track video," regardless of output format:

Here's the shell output before I ^C it:

> python -m stytra.offline.track_video
QSettings::value: Empty key passed
QSettings::value: Empty key passed
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1519: RuntimeWarning: invalid value encountered in remainder
  ddmod = mod(dd + pi, 2*pi) - pi
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1520: RuntimeWarning: invalid value encountered in greater
  _nx.copyto(ddmod, pi, where=(ddmod == -pi) & (dd > 0))
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1522: RuntimeWarning: invalid value encountered in less
  _nx.copyto(ph_correct, 0, where=abs(dd) < discont)
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1519: RuntimeWarning: invalid value encountered in remainder
  ddmod = mod(dd + pi, 2*pi) - pi
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1520: RuntimeWarning: invalid value encountered in greater
  _nx.copyto(ddmod, pi, where=(ddmod == -pi) & (dd > 0))
/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/numpy/lib/function_base.py:1522: RuntimeWarning: invalid value encountered in less
  _nx.copyto(ph_correct, 0, where=abs(dd) < discont)
^C  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/gui/container_windows.py", line 194, in closeEvent
    self.experiment.wrap_up()
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/experiments/tracking_experiments.py", line 424, in wrap_up
    super().wrap_up(*args, **kwargs)
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/experiments/tracking_experiments.py", line 137, in wrap_up
    self.camera.join()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/process.py", line 140, in join
    res = self._popen.wait(timeout)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/popen_fork.py", line 48, in wait
    return self.poll(os.WNOHANG if timeout == 0.0 else 0)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/popen_fork.py", line 28, in poll
    pid, sts = os.waitpid(self.pid, flag)
<class 'KeyboardInterrupt'>: 
^CError in sys.excepthook:
Process camera:
Traceback (most recent call last):
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/popen_fork.py", line 28, in poll
    pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt

Original exception was:
Traceback (most recent call last):
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/gui/container_windows.py", line 194, in closeEvent
    self.experiment.wrap_up()
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/experiments/tracking_experiments.py", line 424, in wrap_up
    super().wrap_up(*args, **kwargs)
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/experiments/tracking_experiments.py", line 137, in wrap_up
    self.camera.join()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/process.py", line 140, in join
    res = self._popen.wait(timeout)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/popen_fork.py", line 48, in wait
    return self.poll(os.WNOHANG if timeout == 0.0 else 0)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/popen_fork.py", line 28, in poll
    pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
Traceback (most recent call last):
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/stytra/hardware/video/__init__.py", line 351, in run
    time.sleep(extrat)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/process.py", line 300, in _bootstrap
    util._exit_function()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/util.py", line 337, in _exit_function
    _run_finalizers()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/util.py", line 277, in _run_finalizers
    finalizer()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/util.py", line 201, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/queues.py", line 192, in _finalize_join
    thread.join()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/threading.py", line 1044, in join
    self._wait_for_tstate_lock()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/threading.py", line 1060, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/pyqtgraph/__init__.py", line 312, in cleanup
    if isinstance(o, QtGui.QGraphicsItem) and isQObjectAlive(o) and o.scene() is None:
<class 'ReferenceError'>: weakly-referenced object no longer exists
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/nix/store/4ffsiqql2lf9jlysdcljzlhz8igph3gs-python3-3.7.5-env/lib/python3.7/site-packages/pyqtgraph/__init__.py", line 312, in cleanup
    if isinstance(o, QtGui.QGraphicsItem) and isQObjectAlive(o) and o.scene() is None:
ReferenceError: weakly-referenced object no longer exists
^CError in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/util.py", line 277, in _run_finalizers
    finalizer()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/util.py", line 201, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/multiprocessing/queues.py", line 192, in _finalize_join
    thread.join()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/threading.py", line 1044, in join
    self._wait_for_tstate_lock()
  File "/nix/store/5dbidajmgrvskv7kj6bwrkza8szxkgar-python3-3.7.5/lib/python3.7/threading.py", line 1060, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt

Edit: The hang is still accurate, but I was wrong: csv and hdf5 files are created. The csv file looks good, but in the hdf5 file, I cannot open any of the data arrays, likely as the file was not closed properly

Edit 2: seems like I can read the data with tables in python but not hdfview

Edit 3: My video is 346548 frames, but the hdf5 & csv files are only 346507 frames--I wonder if the final 31 frames are being dropped?

vilim commented 4 years ago

It seems that Stytra hangs on joining the video file reading process (which is used during the preview/tracking settings phase). I must admit that we have not tested it extensively on Linux, and it your distribution is rather uncommon. As far as I can tell it is an interaction problem with ffmpeg and python multiprocessing. Stytra and ffmpeg using 200% CPU during tracking is normal and expected if you have a 4-core processor, but if it keeps like this after the tracking is done, that is a bug. We still have not figured out why Stytra hangs on quitting if it is kept open for a long time.

Regarding the missing frames, I don't know what could be the issue, as the HDF5/CSV files are only saved after the processing is complete, and iterating through frames is handled by the imageio library (see here)

Viewing the HDF5 files with something like hdfview will be problematic as there is no native support for tabular data. We use the flammkuchen library which gives you pandas dataframes directly from the files (via pytables).

Finally, the progress bar being at 100% is another bug: because imagio sometimes gives an incorrect amount of frames, I hard-coded an upper bound and the progress bar has an incorrect range. I will fix it in the next release.

tbenst commented 4 years ago

Thanks for the detailed reply!

200% CPU during tracking is normal and expected if you have a 4-core processor, but if it keeps like this after the tracking is done, that is a bug.

8-core/16-thread processor, and it does stop when tracking has finished. The hang occurs only after tracking finishes.

Regarding the missing frames

Oops the problem happened before stytra..ffmpeg or libx264 is dropping frames, stytra indeed processes every frame, sorry for the false alarm!

your distribution is rather uncommon.

Yes, indeed. I'm a big fan of Nix/NixOS for science as it provides reproducible builds, all the way down to glibc. For example, can easily run the example looming experiment on a computer with Nix installed (only tested on Ubuntu & NixOS):

# install nix
> curl https://nixos.org/nix/install | sh
# run looming experiment
> nix-shell -I nixpkgs=https://github.com/tbenst/nixpkgs/archive/40e11a4fd00b82805e7f647dcbd32aeaa1eeffb5.tar.gz -p python3Packages.stytra --run "python -m stytra.examples.looming_exp"

Behind the scenes, this automatically pulled in opencv, opengl, pyqt, python3.7, etc, with all dependencies pinned and hashed. Currently, that one liner will trigger compiling, but I'm working to get a cache server running for common data-science packages when built against, say Intel MKL. I believe NixOS is the first operating system to ship a package for stytra :).

I will fix it in the next release.

Sweet! Thanks again for the package--well documented and commented.

Btw, (offtopic question), is it possible to extract bouts using offline pipeline? The Fig 4. example bouts notebook looks like a good starting point, but requires the vigor metric. I figured might be possible to modify offline tracking notebook, which as I understand would require including VigorMotionEstimator. Since this is a Estimator, not a pipeline node, seems like I'd need to read frames into a QueueDataAccumulator first. Unless I'm missing something, seems it would be easier for me to re-implement get_velocity as a function rather than use the estimator object?

vilim commented 4 years ago

Regarding the last question: the estimators are not really needed in the offline case, they have a structure with peculiarities for closed-loop experiments.

To extract bouts you need some sort of motion measure (the vigor for embedded fish is just a 50ms rolling window standard deviation, easy to get with Pandas series.rolling() method; for freely-swimming fish you can just use the speed). After you have the vigor measure, the function here should take care of the rest.

Regarding the hanging: we are still not sure where it comes from, we investigated it on several occasions, any pointers would be welcome.

tbenst commented 4 years ago

Thanks for the tips re: vigor, straightforward enough. Does stytra currently do anything for classifying bout type, e.g. left/right turn? I saw the freely-swimming phototaxis experiment but wasn't sure if there was already code for estimating bout angle in headfixed.

Regarding the hanging: we are still not sure where it comes from, we investigated it on several occasions, any pointers would be welcome.

It appears that CameraSource has a wait while VideoFileSource does not, perhaps this makes a difference? https://github.com/portugueslab/stytra/blob/88f8a6380a1c66503d1110b881b53bc48bf1ce4f/stytra/hardware/video/__init__.py#L178

I'm also suspicious of: https://github.com/portugueslab/stytra/blob/88f8a6380a1c66503d1110b881b53bc48bf1ce4f/stytra/hardware/video/__init__.py#L231

There is no video playing when I click "Track video", so if this is set to 0 then the while loop might sleep forever

MadCatX commented 4 years ago

FWIW, I ran into the same problem and cobbled together these two patches which fix the issue for me:

From 1e0285cbf45740de4702d0586cf32de93838c3c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Mal=C3=BD?= <madcatxster@devoid-pointer.net>
Date: Wed, 12 Feb 2020 15:06:48 +0100
Subject: [PATCH 1/4] Fix offline video looping

---
 stytra/hardware/video/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stytra/hardware/video/__init__.py b/stytra/hardware/video/__init__.py
index c6ca85b..027b20f 100644
--- a/stytra/hardware/video/__init__.py
+++ b/stytra/hardware/video/__init__.py
@@ -363,7 +363,7 @@ class VideoFileSource(VideoSource):
                     for m in messages:
                         self.message_queue.put(m)

-                container.seek(0, whence="frame")
+                container.seek(0)

             return

-- 
2.26.2
From d3226aef04c7fdc84f7b288f614fe80f221be620 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Mal=C3=BD?= <madcatxster@devoid-pointer.net>
Date: Wed, 12 Feb 2020 17:03:09 +0100
Subject: [PATCH 2/4] Do not hang on exit

---
 stytra/experiments/tracking_experiments.py | 3 ++-
 stytra/hardware/video/__init__.py          | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/stytra/experiments/tracking_experiments.py b/stytra/experiments/tracking_experiments.py
index 4fb1ccb..e405ea3 100644
--- a/stytra/experiments/tracking_experiments.py
+++ b/stytra/experiments/tracking_experiments.py
@@ -134,7 +134,8 @@ class CameraVisualExperiment(VisualExperiment):
         for q in [self.camera.frame_queue]:
             q.clear()

-        self.camera.join()
+        if self.camera.is_alive():
+            self.camera.join()

     def excepthook(self, exctype, value, tb):
         """
diff --git a/stytra/hardware/video/__init__.py b/stytra/hardware/video/__init__.py
index 027b20f..f8b6b82 100644
--- a/stytra/hardware/video/__init__.py
+++ b/stytra/hardware/video/__init__.py
@@ -335,8 +335,11 @@ class VideoFileSource(VideoSource):
             container.streams.video[0].thread_count = 1

             prt = None
-            while self.loop:
+            while not self.kill_event.is_set():
                 for framedata in container.decode(video=0):
+                    if self.kill_event.is_set():
+                        break
+
                     messages = []
                     if self.paused:
                         frame = self.old_frame
@@ -363,9 +366,9 @@ class VideoFileSource(VideoSource):
                     for m in messages:
                         self.message_queue.put(m)

-                container.seek(0)
+                    print(framedata)

-            return
+                container.seek(0)

 class VideoControlParameters(ParametrizedQt):
-- 
2.26.2