nextcloud / recognize

šŸ‘ šŸ‘‚ Smart media tagging for Nextcloud: recognizes faces, objects, landscapes, music genres
https://apps.nextcloud.com/apps/recognize
GNU Affero General Public License v3.0
539 stars 45 forks source link

Facial Recognition not progressing after updating to 3.2.1 #483

Closed biglari closed 1 year ago

biglari commented 1 year ago

Describe the bug

I was using Recognize 3.1.2, which was Queuing images I had blocked with .nomedia tags, so I stopped recognition again. At this point, 10s of thousands of images had been queued. I saw this problem was fixed for 3.2.1, so I updated.

After updating, I started the facial recognition again, and left it running for 24hrs. It has made no progress (15361 have been queued since I activated facial recognition). I checked the log and found the following error, repeating every 5 minutes:


OC\DB\Exceptions\DbalException: An exception occurred while executing a query: SQLSTATE[HY000]: General error: 7 number of parameters must be between 0 and 65535

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 296:

    OC\DB\Exceptions\DbalException::wrap(Doctrine\DBA ... {})

    /var/www/html/custom_apps/recognize/lib/BackgroundJobs/StorageCrawlJob.php - line 164:

    OC\DB\QueryBuilder\QueryBuilder->executeQuery()

    /var/www/html/lib/public/BackgroundJob/Job.php - line 78:

    OCA\Recognize\BackgroundJobs\StorageCrawlJob->run([ 1,1,2,0,[ "faces"]])

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 58:

    OCP\BackgroundJob\Job->start(OC\BackgroundJob\JobList {})

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 48:

    OCP\BackgroundJob\QueuedJob->start(OC\BackgroundJob\JobList {})

    /var/www/html/cron.php - line 152:

    OCP\BackgroundJob\QueuedJob->execute(OC\BackgroundJob\JobList {}, OC\Log {})

Caused by Doctrine\DBAL\Exception\DriverException: An exception occurred while executing a query: SQLSTATE[HY000]: General error: 7 number of parameters must be between 0 and 65535

    /var/www/html/3rdparty/doctrine/dbal/src/Connection.php - line 1780:

    Doctrine\DBAL\Driver\API\PostgreSQL\ExceptionConverter->convert(Doctrine\DBA ... {}, Doctrine\DBAL\Query {})

    /var/www/html/3rdparty/doctrine/dbal/src/Connection.php - line 1719:

    Doctrine\DBAL\Connection->handleDriverException(Doctrine\DBA ... {}, Doctrine\DBAL\Query {})

    /var/www/html/3rdparty/doctrine/dbal/src/Connection.php - line 1067:

    Doctrine\DBAL\Connection->convertExceptionDuringQuery(Doctrine\DBA ... {}, "SELECT \"fi ... 0", [ 1,"files/% ... "], [ 1,2,2,2,1, ... "])

    /var/www/html/lib/private/DB/Connection.php - line 261:

    Doctrine\DBAL\Connection->executeQuery("SELECT \"fi ... 0", [ 1,"files/% ... "], [ 1,2,2,2,1, ... "], null)

    /var/www/html/3rdparty/doctrine/dbal/src/Query/QueryBuilder.php - line 345:

    OC\DB\Connection->executeQuery("SELECT \"fi ... 0", [ [ 7395,740 ... "], [ 101,101,10 ... "])

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 281:

    Doctrine\DBAL\Query\QueryBuilder->execute()

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 294:

    OC\DB\QueryBuilder\QueryBuilder->execute()

    /var/www/html/custom_apps/recognize/lib/BackgroundJobs/StorageCrawlJob.php - line 164:

    OC\DB\QueryBuilder\QueryBuilder->executeQuery()

    /var/www/html/lib/public/BackgroundJob/Job.php - line 78:

    OCA\Recognize\BackgroundJobs\StorageCrawlJob->run([ 1,1,2,0,[ "faces"]])

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 58:

    OCP\BackgroundJob\Job->start(OC\BackgroundJob\JobList {})

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 48:

    OCP\BackgroundJob\QueuedJob->start(OC\BackgroundJob\JobList {})

    /var/www/html/cron.php - line 152:

    OCP\BackgroundJob\QueuedJob->execute(OC\BackgroundJob\JobList {}, OC\Log {})

Caused by Doctrine\DBAL\Driver\PDO\Exception: SQLSTATE[HY000]: General error: 7 number of parameters must be between 0 and 65535

    /var/www/html/3rdparty/doctrine/dbal/src/Driver/PDO/Statement.php - line 94:

    Doctrine\DBAL\Driver\PDO\Exception::new(PDOException ... ]})

    /var/www/html/3rdparty/doctrine/dbal/src/Connection.php - line 1057:

    Doctrine\DBAL\Driver\PDO\Statement->execute()

    /var/www/html/lib/private/DB/Connection.php - line 261:

    Doctrine\DBAL\Connection->executeQuery("SELECT \"fi ... 0", [ 1,"files/% ... "], [ 1,2,2,2,1, ... "], null)

    /var/www/html/3rdparty/doctrine/dbal/src/Query/QueryBuilder.php - line 345:

    OC\DB\Connection->executeQuery("SELECT \"fi ... 0", [ [ 7395,740 ... "], [ 101,101,10 ... "])

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 281:

    Doctrine\DBAL\Query\QueryBuilder->execute()

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 294:

    OC\DB\QueryBuilder\QueryBuilder->execute()

    /var/www/html/custom_apps/recognize/lib/BackgroundJobs/StorageCrawlJob.php - line 164:

    OC\DB\QueryBuilder\QueryBuilder->executeQuery()

    /var/www/html/lib/public/BackgroundJob/Job.php - line 78:

    OCA\Recognize\BackgroundJobs\StorageCrawlJob->run([ 1,1,2,0,[ "faces"]])

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 58:

    OCP\BackgroundJob\Job->start(OC\BackgroundJob\JobList {})

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 48:

    OCP\BackgroundJob\QueuedJob->start(OC\BackgroundJob\JobList {})

    /var/www/html/cron.php - line 152:

    OCP\BackgroundJob\QueuedJob->execute(OC\BackgroundJob\JobList {}, OC\Log {})

Caused by PDOException: SQLSTATE[HY000]: General error: 7 number of parameters must be between 0 and 65535

    /var/www/html/3rdparty/doctrine/dbal/src/Driver/PDO/Statement.php - line 92:

    PDOStatement->execute(null)

    /var/www/html/3rdparty/doctrine/dbal/src/Connection.php - line 1057:

    Doctrine\DBAL\Driver\PDO\Statement->execute()

    /var/www/html/lib/private/DB/Connection.php - line 261:

    Doctrine\DBAL\Connection->executeQuery("SELECT \"fi ... 0", [ 1,"files/% ... "], [ 1,2,2,2,1, ... "], null)

    /var/www/html/3rdparty/doctrine/dbal/src/Query/QueryBuilder.php - line 345:

    OC\DB\Connection->executeQuery("SELECT \"fi ... 0", [ [ 7395,740 ... "], [ 101,101,10 ... "])

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 281:

    Doctrine\DBAL\Query\QueryBuilder->execute()

    /var/www/html/lib/private/DB/QueryBuilder/QueryBuilder.php - line 294:

    OC\DB\QueryBuilder\QueryBuilder->execute()

    /var/www/html/custom_apps/recognize/lib/BackgroundJobs/StorageCrawlJob.php - line 164:

    OC\DB\QueryBuilder\QueryBuilder->executeQuery()

    /var/www/html/lib/public/BackgroundJob/Job.php - line 78:

    OCA\Recognize\BackgroundJobs\StorageCrawlJob->run([ 1,1,2,0,[ "faces"]])

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 58:

    OCP\BackgroundJob\Job->start(OC\BackgroundJob\JobList {})

    /var/www/html/lib/public/BackgroundJob/QueuedJob.php - line 48:

    OCP\BackgroundJob\QueuedJob->start(OC\BackgroundJob\JobList {})

    /var/www/html/cron.php - line 152:

    OCP\BackgroundJob\QueuedJob->execute(OC\BackgroundJob\JobList {}, OC\Log {})

Uninstalling and reinstalling recognize, did not fix the error.

To Reproduce Steps to reproduce the behavior: I am running only one instance of Nextcloud, and have no second instance on which I could try to reproduce this issue. So I am unsure how to reproduce it.

Expected behavior Recognize should be able to process the queued images without Database Errors.

Recognize (please complete the following information):

Server (please complete the following information):

Additional context Nextcloud is installed on TrueNAS Next as a k3s deployment

marcelklehr commented 1 year ago

Arrgh, we are killing our query builder :S I'll try to come up with a fix.

biglari commented 1 year ago

@marcelklehr I am not familiar with the nextcloud and recognize source code, but I was looking at the code a bit, to try and understand what the source of the error could be. As far as I understand it, a select query is built to retrieve ignore files from the Database. These are then used to generate a list of all files within the ignore files parent folders, which are then used to generate a list of file IDs. These IDs are then entered into a second query to filter the results. I assume this list of IDs is also the reason the query fails with too many parameters.

Perhaps a workaround would be to enter the filter IDs into a temporary table, then use a WHERE ... IN (SELECT...) or an INNER JOIN to filter the results. The IDs could be entered into the temporary table in batches.

biglari commented 1 year ago

@marcelklehr Unfortunately this error was not fixed with 3.2.2. For additional information I have uploaded the raw log entry of the error. recognizeDBErrorLog.txt

biglari commented 1 year ago

It seems there is a limit on how many total parameters may be in a single query, the limit of 999 is for single expressions only. The total limit however is 65535. The queries generated in my instance have somewhere around 10x as many parameters.

marcelklehr commented 1 year ago

Unfortunately this error was not fixed with 3.2.2

I'm aware. The fix was merged after 3.2.2 was released.

biglari commented 1 year ago

Unfortunately this error was not fixed with 3.2.2

I'm aware. The fix was merged after 3.2.2 was released.

I apologize, the release lists #493 as part of the release, If that is not the case, then I shall wait to test this on the next release.

marcelklehr commented 1 year ago

Mh. I don't see it here: https://github.com/nextcloud/recognize/releases/tag/v3.2.2

biglari commented 1 year ago

Sorry, my mistake,

biglari commented 1 year ago

Hate to spam a closed issue. I setup a test environment and had trouble reproducing the issue with a clone from master, so I updated my main instance to the master version, and found the error persisted. I ultimately settled for the following solution. I am unsure if these changes are acceptable to your project, but perhaps this might be useful to you:

StorageCrawlJob.php


<?php
/*
 * Copyright (c) 2021. The Nextcloud Recognize contributors.
 *
 * This file is licensed under the Affero General Public License version 3 or later. See the COPYING file.
 */

namespace OCA\Recognize\BackgroundJobs;

use OC\Files\Cache\CacheQueryBuilder;
use OC\SystemConfig;
use OCA\Recognize\Classifiers\Audio\MusicnnClassifier;
use OCA\Recognize\Classifiers\Images\ClusteringFaceClassifier;
use OCA\Recognize\Classifiers\Images\ImagenetClassifier;
use OCA\Recognize\Classifiers\Images\LandmarksClassifier;
use OCA\Recognize\Classifiers\Video\MovinetClassifier;
use OCA\Recognize\Constants;
use OCA\Recognize\Db\QueueFile;
use OCA\Recognize\Service\Logger;
use OCA\Recognize\Service\QueueService;                                             
use OCA\Recognize\Service\TagManager;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\IJobList;
use OCP\BackgroundJob\QueuedJob;
use OCP\DB\Exception;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\Files\IMimeTypeLoader;
use OCP\IDBConnection;
use Psr\Log\LoggerInterface;

class StorageCrawlJob extends QueuedJob {
    private LoggerInterface $logger;
    private IMimeTypeLoader $mimeTypes;
    private QueueService $queue;
    private IJobList $jobList;
    private IDBConnection $db;
    private SystemConfig $systemConfig;
    private TagManager $tagManager;

    public function __construct(ITimeFactory $timeFactory, Logger $logger, IMimeTypeLoader $mimeTypes, QueueService $queue, IJobList $jobList, IDBConnection $db, SystemConfig $systemConfig, TagManager $tagManager) {
        parent::__construct($timeFactory);
        $this->logger = $logger;
        $this->mimeTypes = $mimeTypes;
        $this->queue = $queue;
        $this->jobList = $jobList;
        $this->db = $db;
        $this->systemConfig = $systemConfig;
        $this->tagManager = $tagManager;
    }

    private function getDir(int $fileid, array $directoryTypes, bool $recursive = false): array {
        /** @var \OCP\DB\QueryBuilder\IQueryBuilder $qb */
        $qb = new CacheQueryBuilder($this->db, $this->systemConfig, $this->logger);
        $dir = $qb->selectFileCache()
            ->andWhere($qb->expr()->in('mimetype', $qb->createNamedParameter($directoryTypes, IQueryBuilder::PARAM_INT_ARRAY)))
            ->andWhere($qb->expr()->eq('parent', $qb->createNamedParameter($fileid)))
            ->executeQuery()->fetchAll();

        if ($recursive) {
            foreach ($dir as $item) {
                $dir = array_merge($dir, $this->getDir($item['fileid'], $directoryTypes, $recursive));
            }
        }
        return $dir;
    }

    protected function run($argument): void {
        $storageId = $argument['storage_id'];
        $rootId = $argument['root_id'];
        $overrideRoot = $argument['override_root'];
        $lastFileId = $argument['last_file_id'];
        /**
         * @var list<string> $models
         */
        $models = $argument['models'] ?? [
            ClusteringFaceClassifier::MODEL_NAME,
            ImagenetClassifier::MODEL_NAME,
            LandmarksClassifier::MODEL_NAME,
            MovinetClassifier::MODEL_NAME,
            MusicnnClassifier::MODEL_NAME,
        ];

        /** @var \OCP\DB\QueryBuilder\IQueryBuilder $qb */
        $qb = new CacheQueryBuilder($this->db, $this->systemConfig, $this->logger);
        try {
            $root = $qb->selectFileCache()
                ->andWhere($qb->expr()->eq('filecache.fileid', $qb->createNamedParameter($overrideRoot, IQueryBuilder::PARAM_INT)))
                ->executeQuery()->fetch();
        } catch (Exception $e) {
            $this->logger->error('Could not fetch storage root', ['exception' => $e]);
            return;
        }

        if ($root === false) {
            $this->logger->error('Could not find storage root');
            return;
        }

        $imageTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Constants::IMAGE_FORMATS);
        $videoTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Constants::VIDEO_FORMATS);
        $audioTypes = array_map(fn ($mimeType) => $this->mimeTypes->getId($mimeType), Constants::AUDIO_FORMATS);

        $qb = new CacheQueryBuilder($this->db, $this->systemConfig, $this->logger);

        $mediaSelectors = array('.nomedia');
        if (count(array_intersect([ClusteringFaceClassifier::MODEL_NAME, ImagenetClassifier::MODEL_NAME, LandmarksClassifier::MODEL_NAME], $models)) > 0) {
            $mediaSelectors[] = '.noimage';
        }
        if (in_array(MovinetClassifier::MODEL_NAME, $models)) {
            $mediaSelectors[] = '.novideo';
        }
        if (in_array(MusicnnClassifier::MODEL_NAME, $models)) {
            $mediaSelectors[] = '.nomusic';
        }
        try {
            $path = $root['path'] === '' ? '' :  $root['path'] . '/';

            $qb->select('C.fileid', 'C.storage', 'C.path', 'C.path_hash', 'C.parent', 'C.name', 'C.mimetype', 'C.mimepart', 'C.size', 'C.mtime',
            'C.storage_mtime', 'C.encrypted', 'C.etag', 'C.permissions', 'C.checksum', 'C.unencrypted_size', 'D.metadata_etag', 'D.creation_time', 'D.upload_time')
                ->from('filecache', 'A')
                ->leftJoin('A', 'filecache', 'B', $qb->expr()->eq('A.parent', 'B.fileid'))
                ->innerJoin('B', 'filecache', 'C', $qb->expr()->andX("\"C\".\"path\" LIKE \"B\".\"path\" || '%'", $qb->expr()->notIn('A.name', $qb->createNamedParameter($mediaSelectors, IQueryBuilder::PARAM_STR_ARRAY))))
                ->leftJoin('C', 'filecache_extended', 'D', $qb->expr()->eq("C.fileid", 'D.fileid'))
                ->where($qb->expr()->eq('C.storage', $qb->createNamedParameter($storageId, IQueryBuilder::PARAM_INT)))
                ->andWhere($qb->expr()->like('C.path', $qb->createNamedParameter($path . '%')))
                ->andWhere($qb->expr()->gt('C.fileid', $qb->createNamedParameter($lastFileId)));;

            $files = $qb->orderBy('C.fileid', 'ASC')
                ->setMaxResults(100)
                ->executeQuery();
        } catch (Exception $e) {
            $this->logger->error('Could not fetch files', ['exception' => $e]);
            return;
        }

        // Remove current iteration
        $this->jobList->remove(self::class, $argument);

        $i = 0;
        /** @var array $file */
        while ($file = $files->fetch()) {
            $i++;
            $queueFile = new QueueFile();
            $queueFile->setStorageId((string) $storageId);
            $queueFile->setRootId($rootId);
            $queueFile->setFileId($file['fileid']);
            $queueFile->setUpdate(false);
            try {
                if (in_array($file['mimetype'], $imageTypes)) {
                    if (in_array(ImagenetClassifier::MODEL_NAME, $models)) {
                        $this->queue->insertIntoQueue(ImagenetClassifier::MODEL_NAME, $queueFile);
                    }
                    if (!in_array(ImagenetClassifier::MODEL_NAME, $models) && in_array(LandmarksClassifier::MODEL_NAME, $models)) {
                        $tags = $this->tagManager->getTagsForFiles([$queueFile->getFileId()]);
                        /** @var \OCP\SystemTag\ISystemTag[] $fileTags */
                        $fileTags = $tags[$queueFile->getFileId()];
                        $landmarkTags = array_filter($fileTags, function ($tag) {
                            return in_array($tag->getName(), LandmarksClassifier::PRECONDITION_TAGS);
                        });
                        if (count($landmarkTags) > 0) {
                            $this->queue->insertIntoQueue(LandmarksClassifier::MODEL_NAME, $queueFile);
                        }
                    }
                    if (in_array(ClusteringFaceClassifier::MODEL_NAME, $models)) {
                        $this->queue->insertIntoQueue(ClusteringFaceClassifier::MODEL_NAME, $queueFile);
                    }
                }
                if (in_array($file['mimetype'], $videoTypes)) {
                    $this->queue->insertIntoQueue(MovinetClassifier::MODEL_NAME, $queueFile);
                }
                if (in_array($file['mimetype'], $audioTypes)) {
                    $this->queue->insertIntoQueue(MusicnnClassifier::MODEL_NAME, $queueFile);
                }
            } catch (Exception $e) {
                $this->logger->error('Failed to add file to queue', ['exception' => $e]);
                return;
            }
        }

        if ($i > 0) {
            // Schedule next iteration
            $this->jobList->add(self::class, [
                'storage_id' => $storageId,
                'root_id' => $rootId,
                'override_root' => $overrideRoot,
                'last_file_id' => $queueFile->getFileId(),
                'models' => $models,
            ]);
        }
    }
}
biglari commented 1 year ago

@marcelklehr Would it make sense to you, if I made a pull request with the changes I've made?