apache / mxnet

Lightweight, Portable, Flexible Distributed/Mobile Deep Learning with Dynamic, Mutation-aware Dataflow Dep Scheduler; for Python, R, Julia, Scala, Go, Javascript and more
https://mxnet.apache.org
Apache License 2.0
20.76k stars 6.8k forks source link

End2End Captcha Recognition (OCR) [Segmentation fault] #5153

Closed ZHmao closed 6 years ago

ZHmao commented 7 years ago

When I run the End2End Captcha Recognition (OCR) example, get a segmentation fault. This is the original blog url I changed the devs, from gpu to cpu, for lack of hardware[gpu] Below is the main code:

class OCRIter(mx.io.DataIter):
    def __init__(self, count, batch_size, num_label, height, width):
        super(OCRIter, self).__init__()
        self.captcha = ImageCaptcha(fonts=['./data/OpenSans-Regular.ttf'])
        self.batch_size = batch_size
        self.count = count
        self.height = height
        self.width = width
        self.provide_data = [('data', (batch_size, 3, height, width))]
        self.provide_label = [('softmax_label', (self.batch_size, num_label))]

    def __iter__(self):
        for k in range(self.count / self.batch_size):
            data = []
            label = []
            for i in range(self.batch_size):
                num = gen_rand()
                img = self.captcha.generate(num)
                img = np.fromstring(img.getvalue(), dtype='uint8')
                img = cv2.imdecode(img, cv2.IMREAD_COLOR)
                img = cv2.resize(img, (self.width, self.height))
                cv2.imwrite("./tmp" + str(i % 10) + ".png", img)
                img = np.multiply(img, 1/255.0)
                img = img.transpose(2, 0, 1)
                data.append(img)
                label.append(get_label(num))

            data_all = [mx.nd.array(data)]
            label_all = [mx.nd.array(label)]
            data_names = ['data']
            label_names = ['softmax_label']

            data_batch = OCRBatch(data_names, data_all, label_names, label_all)
            yield data_batch

    def reset(self):
        pass

def get_ocrnet():
    data = mx.symbol.Variable('data')
    label = mx.symbol.Variable('softmax_label')
    conv1 = mx.symbol.Convolution(data=data, kernel=(5,5), num_filter=32)
    pool1 = mx.symbol.Pooling(data=conv1, pool_type="max", kernel=(2,2), stride=(1, 1))
    relu1 = mx.symbol.Activation(data=pool1, act_type="relu")

    conv2 = mx.symbol.Convolution(data=relu1, kernel=(5,5), num_filter=32)
    pool2 = mx.symbol.Pooling(data=conv2, pool_type="avg", kernel=(2,2), stride=(1, 1))
    relu2 = mx.symbol.Activation(data=pool2, act_type="relu")

    conv3 = mx.symbol.Convolution(data=relu2, kernel=(3,3), num_filter=32)
    pool3 = mx.symbol.Pooling(data=conv3, pool_type="avg", kernel=(2,2), stride=(1, 1))
    relu3 = mx.symbol.Activation(data=pool3, act_type="relu")

    flatten = mx.symbol.Flatten(data = relu3)
    fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=512)
    fc21 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
    fc22 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
    fc23 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
    fc24 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
    fc2 = mx.symbol.Concat(*[fc21, fc22, fc23, fc24], dim=0)
    label = mx.symbol.transpose(data=label)
    label = mx.symbol.Reshape(data=label, target_shape=(0, ))
    return mx.symbol.SoftmaxOutput(data=fc2, label=label, name="softmax")

def accuracy(label, pred):
    label = label.T.reshape((-1, ))
    hit = 0
    total = 0
    for i in range(pred.shape[0] / 4):
        ok = True
        for j in range(4):
            k = i * 4 + j
            if np.argmax(pred[k]) != int(label[k]):
                ok = False
                break
        if ok:
            hit += 1
        total += 1
    return 1.0 * hit / total

if __name__ == '__main__':
    network = get_ocrnet()
    devs = [mx.cpu()]
    model = mx.model.FeedForward(ctx=devs,
                                 symbol=network,
                                 num_epoch=15,
                                 learning_rate=0.001,
                                 wd=0.00001,
                                 initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
                                 momentum=0.9)

    data_train = OCRIter(100000, 50, 4, 30, 80)
    data_test = OCRIter(1000, 50, 4, 30, 80)

    import logging
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)

    model.fit(X=data_train, eval_data=data_test, eval_metric=accuracy, 
              batch_end_callback=mx.callback.Speedometer(32, 50),)

Environment info

Operating System: virtual machine Linux Mint 18[base on Ubuntu 16.04], 64bit Compiler: gcc 5.4.0 Package used (Python/R/Scala/Julia): Python 2.7.12 MXNet version: get from git master[2017-2-23] If you are using python package, please provide opencv 3.2

Error Message:

run in PyCharm

Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)

gdb py-bt

Traceback (most recent call first): File "test-g.py", line 56, in iter img = cv2.imdecode(img, cv2.IMREAD_COLOR) File "/home/mao/mxnet/python/mxnet/model.py", line 236, in _train_multi_device for data_batch in train_data: File "/home/mao/mxnet/python/mxnet/model.py", line 816, in fit sym_gen=self.sym_gen) File "test-g.py", line 135, in model.fit(X = data_train, eval_data = data_test, eval_metric = Accuracy, batch_end_callback=mx.callback.Speedometer(32, 50),)

gdb bt

0 0x0000000000000000 in ?? ()

1 0x00007ffff18149ee in cv::imdecode(cv::_InputArray const&, int) ()

from /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.2.4

2 0x00007fffd9728f92 in pyopencv_cv_imdecode(_object, _object, _object*) ()

from /usr/lib/python2.7/dist-packages/cv2.so

3 0x00000000004c468a in call_function (oparg=,

pp_stack=0x7fffffffd440) at ../Python/ceval.c:4350

4 PyEval_EvalFrameEx () at ../Python/ceval.c:2987

5 0x00000000004dddca in gen_send_ex.isra.0.lto_priv ()

at ../Objects/genobject.c:85

6 0x00000000004c4c6f in PyEval_EvalFrameEx () at ../Python/ceval.c:2806

7 0x00000000004c2765 in PyEval_EvalCodeEx () at ../Python/ceval.c:3582

8 0x00000000004ca099 in fast_function (nk=17, na=,

n=, pp_stack=0x7fffffffd7e0, func=<function at remote 0x7fffd4b0c488>) at ../Python/ceval.c:4445

9 call_function (oparg=, pp_stack=0x7fffffffd7e0)

at ../Python/ceval.c:4370

10 PyEval_EvalFrameEx () at ../Python/ceval.c:2987

11 0x00000000004c2765 in PyEval_EvalCodeEx () at ../Python/ceval.c:3582

12 0x00000000004ca099 in fast_function (nk=4, na=,

n=, pp_stack=0x7fffffffd9f0, func=<function at remote 0x7fffd4b0d0c8>) at ../Python/ceval.c:4445

13 call_function (oparg=, pp_stack=0x7fffffffd9f0)

---Type to continue, or q to quit--- at ../Python/ceval.c:4370

14 PyEval_EvalFrameEx () at ../Python/ceval.c:2987

15 0x00000000004c2765 in PyEval_EvalCodeEx () at ../Python/ceval.c:3582

16 0x00000000004c2509 in PyEval_EvalCode (co=,

globals=, locals=) at ../Python/ceval.c:669

17 0x00000000004f1def in run_mod.lto_priv () at ../Python/pythonrun.c:1376

18 0x00000000004ec652 in PyRun_FileExFlags () at ../Python/pythonrun.c:1362

19 0x00000000004eae31 in PyRun_SimpleFileExFlags ()

at ../Python/pythonrun.c:948

20 0x000000000049e14a in Py_Main () at ../Modules/main.c:640

21 0x00007ffff7811830 in __libc_start_main (main=0x49dab0
, argc=2,

argv=0x7fffffffde38, init=, fini=, rtld_fini=, stack_end=0x7fffffffde28) at ../csu/libc-start.c:291

22 0x000000000049d9d9 in _start ()

What have you tried to solve it?

I find the point of interrupte is img = cv2.imdecode(img, cv2.IMREAD_COLOR). So I split this part to another file, only generate img from captcha module, and decode it by opencv. It can run well. But when I run the example, with mxnet, decode by opencv will be segmentation fault.

What's wrong?

zht3344 commented 7 years ago

Do you solve this problem?

ZHmao commented 7 years ago

No. I try my best, but there it is. So I change my platform to Windows.

baiyancheng20 commented 7 years ago

I have also met with this problem. Anyone has fixed it?

szha commented 6 years ago

This issue is closed due to lack of activity in the last 90 days. Feel free to ping me to reopen if this is still an active issue. Thanks!

roccolocko commented 6 years ago

The problem happens when using the latest version of opencv, try with a previous one like 3.1.x That solved it for me.

ZHmao commented 6 years ago

Thank you.