wanghaisheng / awesome-ocr

A curated list of promising OCR resources
http://wanghaisheng.github.io/ocr-arxiv-daily/
MIT License
1.67k stars 351 forks source link

ctpn 测试 #62

Closed wanghaisheng closed 6 years ago

wanghaisheng commented 7 years ago
docker run  --rm -it  -v `pwd`:/opt/ctpn/CTPN/demo_images -p 8888:8888  dc/ctpn 

docker run  --rm -it  -v `pwd`:/opt/ctpn/CTPN/demo_images  dc/ctpn /bin/bash
root@8a1d73be4cbc:/opt/ctpn/CTPN# python tools/demo.py --no-gpu 
wanghaisheng commented 7 years ago

https://github.com/AKSHAYUBHAT/DeepVideoAnalytics/blob/master/notebooks/OCR/readme.md

➜ OCR docker run -p 8889:8888 -it akshayubhat/dva-auto:caffe-cpu

wanghaisheng commented 7 years ago
", "")

def process_image(im):
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines=text_detector.detect(im)
    for box in text_lines:
        left, top, right, bottom = box[:4]
        img_height, img_width = im.shape[0], im.shape[1]
        padding_x = int((right - left) / 4)
        width = padding_x * 2 + (right - left)
        height = (64 * width / 128)
        padding_y = int((height - (bottom - top))/2)
        new_left = int(max(0, left - padding_x))
        new_right = int(min(img_width - 1, right + padding_x))
        new_top = int(max(0, top - padding_y))
        new_bottom = int(min(img_height - 1, bottom + padding_y))

        plate = im[new_top: new_bottom, new_left: new_right]
        plate = cv2.cvtColor(plate, cv2.COLOR_RGB2GRAY)
        plate = cv2.resize(plate, (128, 64))
        im_gray = plate / 255.
        feed_dict = {x: numpy.stack([im_gray])}
        feed_dict.update(dict(zip(params, param_values)))
        y_val = sess.run(y, feed_dict=feed_dict)

        letter_probs = (y_val[0,
                        0,
                        0, 1:].reshape(
            10, len(common.CHARS)))
        letter_probs = common.softmax(letter_probs)

        present_prob = common.sigmoid(y_val[0, 0, 0, 0])
        predicted_label = letter_probs_to_code(letter_probs)
        if present_prob > 0.5:
            print("predict", predicted_label, present_prob)
            cv2.imshow("plate", plate)
            cv2.putText(im, predicted_label, (left, max(0, int(top - 5))), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
        cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), (0, 255, 0))

return im
wanghaisheng commented 7 years ago

https://github.com/aovoc/CTPN Implement the training process of CTPN. [https://github.com/tianzhi0549/CTPN]

wanghaisheng commented 7 years ago
# MUST be imported firstly
import sys
import numpy as np

class Config:
    MEAN=np.float32([102.9801, 115.9465, 122.7717])
    TEST_GPU_ID=0
    SCALE=600
    MAX_SCALE=1000

    LINE_MIN_SCORE=0.6
    TEXT_PROPOSALS_MIN_SCORE=0.7
    TEXT_PROPOSALS_NMS_THRESH=0.3
    MAX_HORIZONTAL_GAP=50
    TEXT_LINE_NMS_THRESH=0.3
    MIN_NUM_PROPOSALS=0
    MIN_RATIO=0.9
    MIN_V_OVERLAPS=0.6
    MIN_SIZE_SIM=0.6
    TEXT_PROPOSALS_WIDTH=16

def init():
    sys.path.insert(0, "./tools")
    sys.path.insert(0, "./caffe/python")
    sys.path.insert(0, "./src")
init()

FROM nvidia/cuda:7.0-runtime-ubuntu14.04
MAINTAINER Varun Suresh <fab.varun@gmail.com>
# docker build -t dc/ctpn .
#docker run  --rm -it -v `pwd`:/opt/ctpn/CTPN/demo_images -p 8888:8888  dc/ctpn /bin/bash
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        cmake \
        git \
        wget \
        zip \
        unzip \
        libatlas-base-dev \
        libboost-all-dev \
        libgflags-dev \
        libgoogle-glog-dev \
        libhdf5-serial-dev \
        libleveldb-dev \
        liblmdb-dev \
        libopencv-dev \
        libprotobuf-dev \
        libsnappy-dev \
        protobuf-compiler \
        python-dev \
        python-numpy \
        python-pip \
        python-setuptools \
        python-scipy && \
    rm -rf /var/lib/apt/lists/*

ENV CTPN_ROOT=/opt/ctpn
WORKDIR $CTPN_ROOT

RUN git clone https://github.com/wanghaisheng/CTPN
WORKDIR $CTPN_ROOT/CTPN/caffe

ADD ctpn_trained_model.caffemodel  $CTPN_ROOT/CTPN/models

# Missing "packaging" package
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple  --upgrade pip
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple  packaging

RUN cd python && for req in $(cat requirements.txt) pydot; do pip install -i https://pypi.tuna.tsinghua.edu.cn/simple  $req; done && cd ..
RUN git clone https://github.com/NVIDIA/nccl.git
RUN apt-get update && apt-get install -y --no-install-recommends \
    cuda=7.0-28
WORKDIR /

# Download the CUDA drivers from https://developer.nvidia.com/rdp/cudnn-archive and place it here  :
ADD cudnn-7.0-linux-x64-v4.0-prod.tgz /

WORKDIR /cuda
RUN cp -P include/cudnn.h /usr/include
RUN cp -P lib64/libcudnn* /usr/lib/x86_64-linux-gnu/

WORKDIR $CTPN_ROOT/CTPN/caffe

ADD ./caffe/Makefile.config $CTPN_ROOT/CTPN/caffe
RUN cat Makefile.config
#RUN cp Makefile.config.example Makefile.config
RUN apt-get update && apt-get install -y --no-install-recommends \
    vim
#RUN cd nccl && make -j install && 
#RUN    cd .. && rm -rf nccl && \
RUN   cd $CTPN_ROOT/CTPN/caffe  &&   \
   mkdir build && cd build && \
     cmake -DUSE_CUDNN=0 .. && \
#     WITH_PYTHON_LAYER=1 && \
     pwd && \
     make -j"$(nproc)" && make pycaffe

# Set the environment variables so that the paths are correctly configured
ENV PYCAFFE_ROOT $CTPN_ROOT/CTPN/caffe/python
ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
ENV PATH $CTPN_ROOT/CTPN/caffe/build/tools:$PYCAFFE_ROOT:$PATH
RUN echo "$CTPN_ROOT/CTPN/caffe/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig

# To make sure the python layer builds - Need to figure out a cleaner way to do this.
RUN cp $CTPN_ROOT/CTPN/src/layers/* $CTPN_ROOT/CTPN/caffe/src/caffe/layers/
RUN cp $CTPN_ROOT/CTPN/src/*.py $CTPN_ROOT/CTPN/caffe/src/caffe/
RUN cp -r $CTPN_ROOT/CTPN/src/utils $CTPN_ROOT/CTPN/caffe/src/caffe/

# Install Opencv - 2.4.12 :

RUN cd ~ && \
    mkdir -p ocv-tmp && \
    cd ocv-tmp && \
    wget https://github.com/Itseez/opencv/archive/2.4.12.zip  && \
    unzip 2.4.12.zip && \
    cd opencv-2.4.12 && \
    mkdir release && \
    cd release && \
    cmake -D CMAKE_BUILD_TYPE=RELEASE \
          -D CMAKE_INSTALL_PREFIX=/usr/local \
          -D BUILD_PYTHON_SUPPORT=ON \
          .. && \
    make -j8 && \
    make install && \
    rm -rf ~/ocv-tmp

RUN sudo ln /dev/null /dev/raw1394   
WORKDIR $CTPN_ROOT/CTPN
RUN make

RUN mkdir /opt/ctpn/CTPN/output
VOLUME ['/opt/ctpn/CTPN/output/']
RUN pip install --upgrade jupyter 
RUN mkdir -p -m 700 /root/.jupyter/ && \
    echo "c.NotebookApp.ip = '*'" >> /root/.jupyter/jupyter_notebook_config.py

RUN  pip install numpy --upgrade  -i https://pypi.tuna.tsinghua.edu.cn/simple 
RUN  pip install ipyparallel    -i https://pypi.tuna.tsinghua.edu.cn/simple 
WORKDIR /opt/ctpn/CTPN/
EXPOSE 8888

#COPY run_jupyter.sh /

#CMD ["/run_jupyter.sh", "--allow-root"]
#CMD ["jupyter", "notebook", "--no-browser", "--allow-root"]
CMD jupyter notebook --no-browser --ip=0.0.0.0 --allow-root
import sys
sys.path.append('/opt/ctpn/CTPN/tools/')
from cfg import Config as cfg
#from other import enlarge_boxes, get_output_name, draw_boxes, resize_im, CaffeModel
from other import draw_boxes, resize_im, CaffeModel

import cv2, os, caffe, sys
from detectors import TextProposalDetector, TextDetector
import os.path as osp
from utils.timer import Timer
from IPython.core.display import Image, display
#from tesseract_ocr import tesseract_ocr 

DEMO_IMAGE_DIR="demo_images/pic_folder"
NET_DEF_FILE="models/deploy.prototxt"
MODEL_FILE="models/ctpn_trained_model.caffemodel"

caffe.set_mode_cpu()

#if len(sys.argv)>1 and sys.argv[1]=="--no-gpu":
#    caffe.set_mode_cpu()
#else:
#    caffe.set_mode_gpu()
#    caffe.set_device(cfg.TEST_GPU_ID)

text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
text_detector=TextDetector(text_proposals_detector)

from glob import glob
timer=Timer()
for im_name in glob("demo_images/pic_folder/*.jpg"):
    print im_name
    im_file=osp.join(im_name)
    im=cv2.imread(im_file)
    timer.tic()
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines,text_proposals,scores=text_detector.detect(im)
#    text_lines=enlarge_boxes(text_lines)
    print "Time: %f"%timer.toc()
    print text_lines
   # im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, is_display=True)
    box_count = 0
    for box in text_lines:
        left, top, right, bottom = box[:4]
        img_height, img_width = im.shape[0], im.shape[1]
        padding_x = int((right - left) / 4)
        width = padding_x * 2 + (right - left)
        height = (64 * width / 128)
        padding_y = int((height - (bottom - top))/2)
        new_left = int(max(0, left - padding_x))
        new_right = int(min(img_width - 1, right + padding_x))
        new_top = int(max(0, top - padding_y))
        new_bottom = int(min(img_height - 1, bottom + padding_y))

        crop_img = im[new_top: new_bottom, new_left: new_right]
        #crop_img = cv2.cvtColor(crop_img,cv2.COLOR_RGB2GRAY)
        crop_img = cv2.resize(crop_img, (300,300))
        cv2.imwrite('demo_images/pic_folder/box_text_proposals_{}.jpg'.format(box_count),crop_img)
        box_count += 1     
wanghaisheng commented 7 years ago

https://github.com/xiaolei89tw/CTPN_XL

wanghaisheng commented 6 years ago

https://github.com/bear63/sceneReco

wanghaisheng commented 6 years ago

https://github.com/chineseocr/new-text-detection-ctpn

wanghaisheng commented 6 years ago

https://github.com/Li-Ming-Fan/OCR-CTPN-CRNN