trackmate-sc / TrackMate

TrackMate is your buddy for your everyday tracking.
https://imagej.net/plugins/trackmate
GNU General Public License v3.0
169 stars 76 forks source link

"GC overhead limit exceeded" from v7.6.2 #245

Open miura opened 1 year ago

miura commented 1 year ago

Hi JY @tinevez

I encountered the following error starting from the v7.6.2-snapshot, while the v7.6.1-snapshot or earlier versions do not show this memory handling problem.

Exception in thread "main" java.lang.IllegalArgumentException: Invalid service: net.imagej.animation.DefaultAnimationService
        at org.scijava.service.ServiceHelper.createExactService(ServiceHelper.java:278)
        at org.scijava.service.ServiceHelper.loadService(ServiceHelper.java:229)
        at org.scijava.service.ServiceHelper.loadService(ServiceHelper.java:192)
        at org.scijava.service.ServiceHelper.loadServices(ServiceHelper.java:164)
        at org.scijava.Context.<init>(Context.java:285)
        at org.scijava.Context.<init>(Context.java:234)
        at org.scijava.Context.<init>(Context.java:130)
        at org.scijava.Context.<init>(Context.java:117)
        at org.scijava.Context.<init>(Context.java:106)
        at sun.reflect.GeneratedConstructorAccessor17.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at java.lang.Class.newInstance(Class.java:442)
        at ij.IJ.runUserPlugIn(IJ.java:239)
        at ij.IJ.runPlugIn(IJ.java:204)
        at ij.IJ.runPlugIn(IJ.java:193)
        at fiji.plugin.trackmate.util.TMUtils.getContext(TMUtils.java:714)
        at fiji.plugin.trackmate.detection.DetectionUtils.findLocalMaxima(DetectionUtils.java:355)
        at fiji.plugin.trackmate.detection.DogDetector.process(DogDetector.java:127)

The above error is just an example and can be different as memory exhaustion causes this:

Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
        at java.util.Arrays.copyOf(Arrays.java:3332)
        at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)
        at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)
        at java.lang.StringBuilder.append(StringBuilder.java:141)
        at org.bushe.swing.event.ThreadSafeEventService.subscribe(ThreadSafeEventService.java:346)
        at org.scijava.event.DefaultEventService.subscribe(DefaultEventService.java:184)
        at org.scijava.event.DefaultEventService.subscribe(DefaultEventService.java:203)
        at org.scijava.event.DefaultEventService.subscribe(DefaultEventService.java:137)
        at org.scijava.service.Service.registerEventHandlers(Service.java:66)
        at org.scijava.service.ServiceHelper.createServiceRecursively(ServiceHelper.java:353)
        at org.scijava.service.ServiceHelper.createExactService(ServiceHelper.java:267)

To reproduce this error, please try the following code with two different versions. I start to see the slowing down around 500th iteration, and often stops by 750th iteration. This is not affected by the VM setting such as -xmx.

I will add a download link to the sample image (hardcoded in the main ) below the code.

import java.util.List;

import fiji.plugin.trackmate.Spot;
import fiji.plugin.trackmate.detection.DogDetector;
import ij.IJ;
import ij.ImagePlus;
import net.imglib2.img.Img;
import net.imglib2.img.display.imagej.ImageJFunctions;
import net.imglib2.type.numeric.integer.UnsignedByteType;

public class TrackMateTest20230224 {

    void testMemoryDogDetector(ImagePlus pmimp, int radius,
            int threshold, boolean doSubpixel, boolean doMedian) {

        // Set the parameters for LogDetector
        Img<UnsignedByteType> img = ImageJFunctions.wrap(pmimp);
        Img<UnsignedByteType> interval = img;
        double[] calibration = { 1, 1, 1 };
        DogDetector<UnsignedByteType> detector = new DogDetector<UnsignedByteType>(
                img, interval, calibration, radius, threshold,
                doSubpixel, doMedian);
        // detector.setNumThreads(1);
        detector.process();
        List<Spot> peaks = detector.getResult();
        if (peaks.size() > 0) {
            IJ.log("=== Detected FOCI coordinates ===");
            for (Spot peak : peaks) {
                double xm = peak.getDoublePosition(0);
                double ym = peak.getDoublePosition(1);
                IJ.log("... " + Double.toString(xm) + ", "
                        + Double.toString(ym));
            }
        }
    }

    public void spot2DTestMinimal(String pp) {

        ImagePlus ccimpG = new ImagePlus(pp);
        for (int i = 0; i < 1000; i++) {
            IJ.log("=== count:" + Integer.toString(i));
            int radius = 3;
            int threshold = 5;
            boolean doSubpixel = false;
            boolean doMedian = true;
            testMemoryDogDetector(ccimpG, radius, threshold,
                    doSubpixel, doMedian);

        }
    }
    public static void main(String[] args) throws Exception {
        TrackMateTest20230224 tmt = new TrackMateTest20230224();
        String pp = "/Users/miura/samples/test5dots.tif";
        tmt.spot2DTestMinimal( pp );
    }
}

The sample image: https://www.dropbox.com/s/deu2vc6ftkp9wle/test5dots.tif?dl=0

Just as additional information: when similar processing is done using Jython, there is no problem. Interesting...

from ij import IJ, ImagePlus
from fiji.plugin.trackmate.detection import LogDetector, DogDetector
from net.imglib2.img.display.imagej import ImageJFunctions

def spotDetectionTrackMate(pmimp, Param_Dot):
    # Set the parameters for LogDetector
    img = ImageJFunctions.wrap( pmimp )
    interval = img
    calibration = [ 1, 1, 1 ]
    radius = Param_Dot[0]           
    threshold = Param_Dot[1] 
    doSubpixel = Param_Dot[2]
    doMedian = Param_Dot[3] 

    # Spot Detection
    #detector = LogDetector(img, interval, calibration, radius, threshold, doSubpixel, doMedian)
    detector = DogDetector(img, interval, calibration, radius, threshold, doSubpixel, doMedian) 
    detector.process()
    peaks = detector.getResult()
    print peaks
    print "=== Detected FOCI coordinates ==="
    for peak in peaks:
        xm = peak.getDoublePosition(0)
        ym = peak.getDoublePosition(1)
        print "x: ", xm, "y: ", ym #, "z: ", zm

pp = "/Users/miura/samples/test5dots.tif"
imp = ImagePlus(pp)
Param_DotG = [3, 5, False, True]
for i in range(1000):
    print "=== count ", str(i)
    spotDetectionTrackMate(imp, Param_DotG)

I hope this can be somehow fixed, as this is causing crashes when many spots are analyzed. Until this is fixed, I will use v7.6.1 to avoid the problem.

... and last but not least, big thanks to your masterpiece plugin! Cheers, Kota

ps: following versions were tested.

7.1.1 OK 7.5.1 OK 7.6.1 OK 7.6.2 failed 7.7.1 failed 7.10.2 failed 7.10.3 failed

tinevez commented 1 year ago

Thanks Kota for your kind words. This is super interesting, because the memory overflow seems to be caused by the log!!! That is there is much text to show that it fills the RAM. In the UI, the logger panel as a limited size to avoid this. I am surprised to see this happening here.

Can you try to remove all the IJ.log lines, and anything that prints something?

miura commented 1 year ago

Hi @tinevez

Great suggestion! so I tried:

So what was the change between v7.6.1 and v7.6.2? I cannot see the tag v7.6.2 in the repo but is there one in your local tags?

Cheers, Kota

miura commented 1 year ago

here is the stdout

log4j:WARN No appenders could be found for logger (org.bushe.swing.event.EventService).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Exception in thread "main" java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
        at net.imglib2.algorithm.convolution.LineConvolution.execute(LineConvolution.java:184)
        at net.imglib2.algorithm.convolution.LineConvolution.forEachIntervalElementInParallel(LineConvolution.java:168)
        at net.imglib2.algorithm.convolution.LineConvolution.process(LineConvolution.java:116)
        at net.imglib2.algorithm.convolution.AbstractMultiThreadedConvolution.process(AbstractMultiThreadedConvolution.java:88)
        at net.imglib2.algorithm.convolution.Concatenation.process(Concatenation.java:113)
        at net.imglib2.algorithm.gauss3.Gauss3.gauss(Gauss3.java:207)
        at net.imglib2.algorithm.gauss3.Gauss3.gauss(Gauss3.java:131)
        at fiji.plugin.trackmate.detection.DogDetector.lambda$process$0(DogDetector.java:111)
        at net.imglib2.parallel.Parallelization.runWithExecutor(Parallelization.java:201)
        at net.imglib2.parallel.Parallelization.runWithNumThreads(Parallelization.java:158)
        at fiji.plugin.trackmate.detection.DogDetector.process(DogDetector.java:110)
        at miura.test.trackmate.TrackMateTest20230224.testMemoryDogDetector(TrackMateTest20230224.java:39)
        at miura.test.trackmate.TrackMateTest20230224.spot2DTestMinimal(TrackMateTest20230224.java:149)
        at miura.test.trackmate.TrackMateTest20230224.main(TrackMateTest20230224.java:161)
Caused by: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
        at java.util.concurrent.FutureTask.report(FutureTask.java:122)
        at java.util.concurrent.FutureTask.get(FutureTask.java:192)
        at net.imglib2.algorithm.convolution.LineConvolution.execute(LineConvolution.java:177)
        ... 13 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
        at net.imglib2.outofbounds.OutOfBoundsMirrorFactory.create(OutOfBoundsMirrorFactory.java:69)
        at net.imglib2.outofbounds.OutOfBoundsMirrorFactory.create(OutOfBoundsMirrorFactory.java:51)
        at net.imglib2.view.ExtendedRandomAccessibleInterval.randomAccess(ExtendedRandomAccessibleInterval.java:75)
        at net.imglib2.view.ExtendedRandomAccessibleInterval.randomAccess(ExtendedRandomAccessibleInterval.java:54)
        at net.imglib2.view.IntervalView.randomAccess(IntervalView.java:131)
        at net.imglib2.algorithm.convolution.LineConvolution.lambda$process$1(LineConvolution.java:99)
        at net.imglib2.algorithm.convolution.LineConvolution$$Lambda$16/1188753216.get(Unknown Source)
        at net.imglib2.algorithm.convolution.LineConvolution.lambda$forEachIntervalElementInParallel$2(LineConvolution.java:156)
        at net.imglib2.algorithm.convolution.LineConvolution$$Lambda$17/924477420.call(Unknown Source)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:750)
tinevez commented 1 year ago

Ok then it's definitely an issue with the memory not clearing between iterations. I suspect it is the same reason that for #185, for which I could not find a fix. I also do not understand why it happens in v7.6.2 and not in v7.6.1. The code you attach seems to be an excellent way to start investigating in a systematic manner (thanks!) but I have no time and energy in the next month. Please be patient.

In you Java code I see that we always use the same imp, so that is not a problem of closing and freeing the imp. Does the problem go away if you put the line

 Img<UnsignedByteType> img = ImageJFunctions.wrap(pmimp);

outside of the loop?

miura commented 1 year ago

OK, so I tried compacting the loop content more and using the final declaration, hoping there is a bit more stability in the memory usage but the symptom is the same.

import org.scijava.command.Command;
import org.scijava.log.Logger;
import org.scijava.plugin.Parameter;
import org.scijava.plugin.Plugin;

import fiji.plugin.trackmate.detection.DogDetector;
import ij.ImagePlus;
import net.imagej.ImageJ;
import net.imglib2.img.Img;
import net.imglib2.img.display.imagej.ImageJFunctions;
import net.imglib2.type.numeric.integer.UnsignedByteType;

@Plugin(type = Command.class, menuPath = "Tutorials>TrackMate test")
public class TrackMateTest20230228v3 implements Command {

    @Parameter
    private Logger logger;

    @Override
    public void run() {
        logger.info("test");
        String pp = "/Users/miura/samples/test5dots.tif";
        final ImagePlus ccimpG = new ImagePlus(pp);
        final int radius = 3;
        final int threshold = 5;
        final boolean doSubpixel = false;
        final boolean doMedian = true;
        final Img<UnsignedByteType> img = ImageJFunctions.wrap(ccimpG);
        final Img<UnsignedByteType> interval = img;
        final double[] calibration = { 1, 1, 1 };
        for (int i = 0; i < 1000; i++) {
            logger.info("=== count:" + Integer.toString(i));
            final DogDetector<UnsignedByteType> detector = new DogDetector<UnsignedByteType>(
                img, interval, calibration, radius, threshold,
                doSubpixel, doMedian);
            // detector.setNumThreads(1);
            detector.process();

        }
    }

    public static void main(String[] args) throws Exception {
        System.out.println(System.getProperty("java.vm.name"));
        System.out.println(System.getProperty("java.vm.version"));
        System.out.println(System.getProperty("java.vm.info"));
        final ImageJ ij = new ImageJ();
        // ij.launch(args);
        // ij.ui().showUI();
        ij.command().run(TrackMateTest20230228v3.class, true);

    }
}