NaturalHistoryMuseum / pylibdmtx

Read Data Matrix barcodes from Python 2 and 3.
MIT License
147 stars 56 forks source link

Number of decoded images limit #63

Open tyctor opened 3 years ago

tyctor commented 3 years ago

hallo

i have simple script which read datamatrix codes from images image names are stored in txt file, one image name per line there is around 500000+ images

when i run this script, it end after 10000 line or so never do more number of processed lines is different in every run i checked if there is some memory leak, but it seams no process take around 0.9% of memory, and it is not increasing

is there some limit of processed images by this module? i can not find out why not all lines are processed

here is code of my script:

import argparse
import os
import cv2
from pylibdmtx.pylibdmtx import decode
import json
import time

def validate_filenames(parser, filename):
    if not os.path.exists(filename):
        parser.error("Invalid filename `{}`.".format(filename))
    return filename

def read_file(filename, verbose=False):
    root, ext = os.path.splitext(filename)
    jsonfile = f'{root}.json'
    errfile = f'{root}.json.invalid'
    if not os.path.exists(jsonfile) and not os.path.exists(errfile):
        decoded_data = decode(cv2.resize(cv2.rotate(cv2.imread(filename), cv2.ROTATE_180), (256, 128)))
        if not decoded_data:
            print(f'Nothing found in {filename}')
            return
        out = []
        exc = False
        for data in decoded_data:
            decoded = {'data': data.data, 'rect': data.rect._asdict()}
            try:
                value = data.data.decode('utf8')
                decoded['data'] = value
                decoded['data'] = int(value)
            except:
                exc = True
            out.append(decoded)
        if not out:
            print(f'Nothing found in {filename}')
            return
        if exc:
            json.dump(out, open(errfile, 'w'))
            if verbose:
                print(f'Wrong data in {filename}')
        else:
            out = list(sorted(out, key=lambda x:x.get('data')))
            json.dump(out, open(jsonfile, 'w'))
            if verbose:
                print(f'OK {filename}')
    else:
        if verbose:
            print(f'Parsed file exists {filename}')

def read_filenames(contentfile, verbose=False):
    with open(contentfile, 'r') as f:
        filenames = f.read().split('\n')
        if verbose:
            print('Number of lines:', len(filenames))
        for idx, filename in enumerate(filenames):
            if verbose:
                print(idx, filename)
            read_file(filename, verbose=verbose)

def init_argparse() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        usage='%(prog)s <filenames>',
        description=f'Read datamatrix codes from image files and store in json.'
    )
    parser.add_argument(
        'files', metavar='filenames',
        type=lambda f: validate_filenames(parser, f),
        nargs='+',
        help='Files to read.'
    )
    parser.add_argument(
        '-v',
        action='store_true',
        dest='verbose',
        default=False,
        help='Print file names.'
    )

    return parser

def main() -> None:
    parser = init_argparse()
    args = parser.parse_args()
    for filename in args.files:
        root, ext = os.path.splitext(filename)
        if ext == '.txt':
            read_filenames(filename, verbose=args.verbose)
        else:
            read_file(filename, verbose=args.verbose)

if __name__ == "__main__":
    main()