i have simple script which read datamatrix codes from images
image names are stored in txt file, one image name per line
there is around 500000+ images
when i run this script, it end after 10000 line or so
never do more
number of processed lines is different in every run
i checked if there is some memory leak, but it seams no
process take around 0.9% of memory, and it is not increasing
is there some limit of processed images by this module?
i can not find out why not all lines are processed
here is code of my script:
import argparse
import os
import cv2
from pylibdmtx.pylibdmtx import decode
import json
import time
def validate_filenames(parser, filename):
if not os.path.exists(filename):
parser.error("Invalid filename `{}`.".format(filename))
return filename
def read_file(filename, verbose=False):
root, ext = os.path.splitext(filename)
jsonfile = f'{root}.json'
errfile = f'{root}.json.invalid'
if not os.path.exists(jsonfile) and not os.path.exists(errfile):
decoded_data = decode(cv2.resize(cv2.rotate(cv2.imread(filename), cv2.ROTATE_180), (256, 128)))
if not decoded_data:
print(f'Nothing found in {filename}')
return
out = []
exc = False
for data in decoded_data:
decoded = {'data': data.data, 'rect': data.rect._asdict()}
try:
value = data.data.decode('utf8')
decoded['data'] = value
decoded['data'] = int(value)
except:
exc = True
out.append(decoded)
if not out:
print(f'Nothing found in {filename}')
return
if exc:
json.dump(out, open(errfile, 'w'))
if verbose:
print(f'Wrong data in {filename}')
else:
out = list(sorted(out, key=lambda x:x.get('data')))
json.dump(out, open(jsonfile, 'w'))
if verbose:
print(f'OK {filename}')
else:
if verbose:
print(f'Parsed file exists {filename}')
def read_filenames(contentfile, verbose=False):
with open(contentfile, 'r') as f:
filenames = f.read().split('\n')
if verbose:
print('Number of lines:', len(filenames))
for idx, filename in enumerate(filenames):
if verbose:
print(idx, filename)
read_file(filename, verbose=verbose)
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
usage='%(prog)s <filenames>',
description=f'Read datamatrix codes from image files and store in json.'
)
parser.add_argument(
'files', metavar='filenames',
type=lambda f: validate_filenames(parser, f),
nargs='+',
help='Files to read.'
)
parser.add_argument(
'-v',
action='store_true',
dest='verbose',
default=False,
help='Print file names.'
)
return parser
def main() -> None:
parser = init_argparse()
args = parser.parse_args()
for filename in args.files:
root, ext = os.path.splitext(filename)
if ext == '.txt':
read_filenames(filename, verbose=args.verbose)
else:
read_file(filename, verbose=args.verbose)
if __name__ == "__main__":
main()
hallo
i have simple script which read datamatrix codes from images image names are stored in txt file, one image name per line there is around 500000+ images
when i run this script, it end after 10000 line or so never do more number of processed lines is different in every run i checked if there is some memory leak, but it seams no process take around 0.9% of memory, and it is not increasing
is there some limit of processed images by this module? i can not find out why not all lines are processed
here is code of my script: