Closed wanghaisheng closed 6 years ago
https://github.com/AKSHAYUBHAT/DeepVideoAnalytics/blob/master/notebooks/OCR/readme.md
➜ OCR docker run -p 8889:8888 -it akshayubhat/dva-auto:caffe-cpu
", "")
def process_image(im):
im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
text_lines=text_detector.detect(im)
for box in text_lines:
left, top, right, bottom = box[:4]
img_height, img_width = im.shape[0], im.shape[1]
padding_x = int((right - left) / 4)
width = padding_x * 2 + (right - left)
height = (64 * width / 128)
padding_y = int((height - (bottom - top))/2)
new_left = int(max(0, left - padding_x))
new_right = int(min(img_width - 1, right + padding_x))
new_top = int(max(0, top - padding_y))
new_bottom = int(min(img_height - 1, bottom + padding_y))
plate = im[new_top: new_bottom, new_left: new_right]
plate = cv2.cvtColor(plate, cv2.COLOR_RGB2GRAY)
plate = cv2.resize(plate, (128, 64))
im_gray = plate / 255.
feed_dict = {x: numpy.stack([im_gray])}
feed_dict.update(dict(zip(params, param_values)))
y_val = sess.run(y, feed_dict=feed_dict)
letter_probs = (y_val[0,
0,
0, 1:].reshape(
10, len(common.CHARS)))
letter_probs = common.softmax(letter_probs)
present_prob = common.sigmoid(y_val[0, 0, 0, 0])
predicted_label = letter_probs_to_code(letter_probs)
if present_prob > 0.5:
print("predict", predicted_label, present_prob)
cv2.imshow("plate", plate)
cv2.putText(im, predicted_label, (left, max(0, int(top - 5))), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), (0, 255, 0))
return im
https://github.com/aovoc/CTPN Implement the training process of CTPN. [https://github.com/tianzhi0549/CTPN]
# MUST be imported firstly
import sys
import numpy as np
class Config:
MEAN=np.float32([102.9801, 115.9465, 122.7717])
TEST_GPU_ID=0
SCALE=600
MAX_SCALE=1000
LINE_MIN_SCORE=0.6
TEXT_PROPOSALS_MIN_SCORE=0.7
TEXT_PROPOSALS_NMS_THRESH=0.3
MAX_HORIZONTAL_GAP=50
TEXT_LINE_NMS_THRESH=0.3
MIN_NUM_PROPOSALS=0
MIN_RATIO=0.9
MIN_V_OVERLAPS=0.6
MIN_SIZE_SIM=0.6
TEXT_PROPOSALS_WIDTH=16
def init():
sys.path.insert(0, "./tools")
sys.path.insert(0, "./caffe/python")
sys.path.insert(0, "./src")
init()
FROM nvidia/cuda:7.0-runtime-ubuntu14.04
MAINTAINER Varun Suresh <fab.varun@gmail.com>
# docker build -t dc/ctpn .
#docker run --rm -it -v `pwd`:/opt/ctpn/CTPN/demo_images -p 8888:8888 dc/ctpn /bin/bash
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
wget \
zip \
unzip \
libatlas-base-dev \
libboost-all-dev \
libgflags-dev \
libgoogle-glog-dev \
libhdf5-serial-dev \
libleveldb-dev \
liblmdb-dev \
libopencv-dev \
libprotobuf-dev \
libsnappy-dev \
protobuf-compiler \
python-dev \
python-numpy \
python-pip \
python-setuptools \
python-scipy && \
rm -rf /var/lib/apt/lists/*
ENV CTPN_ROOT=/opt/ctpn
WORKDIR $CTPN_ROOT
RUN git clone https://github.com/wanghaisheng/CTPN
WORKDIR $CTPN_ROOT/CTPN/caffe
ADD ctpn_trained_model.caffemodel $CTPN_ROOT/CTPN/models
# Missing "packaging" package
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple packaging
RUN cd python && for req in $(cat requirements.txt) pydot; do pip install -i https://pypi.tuna.tsinghua.edu.cn/simple $req; done && cd ..
RUN git clone https://github.com/NVIDIA/nccl.git
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda=7.0-28
WORKDIR /
# Download the CUDA drivers from https://developer.nvidia.com/rdp/cudnn-archive and place it here :
ADD cudnn-7.0-linux-x64-v4.0-prod.tgz /
WORKDIR /cuda
RUN cp -P include/cudnn.h /usr/include
RUN cp -P lib64/libcudnn* /usr/lib/x86_64-linux-gnu/
WORKDIR $CTPN_ROOT/CTPN/caffe
ADD ./caffe/Makefile.config $CTPN_ROOT/CTPN/caffe
RUN cat Makefile.config
#RUN cp Makefile.config.example Makefile.config
RUN apt-get update && apt-get install -y --no-install-recommends \
vim
#RUN cd nccl && make -j install &&
#RUN cd .. && rm -rf nccl && \
RUN cd $CTPN_ROOT/CTPN/caffe && \
mkdir build && cd build && \
cmake -DUSE_CUDNN=0 .. && \
# WITH_PYTHON_LAYER=1 && \
pwd && \
make -j"$(nproc)" && make pycaffe
# Set the environment variables so that the paths are correctly configured
ENV PYCAFFE_ROOT $CTPN_ROOT/CTPN/caffe/python
ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
ENV PATH $CTPN_ROOT/CTPN/caffe/build/tools:$PYCAFFE_ROOT:$PATH
RUN echo "$CTPN_ROOT/CTPN/caffe/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig
# To make sure the python layer builds - Need to figure out a cleaner way to do this.
RUN cp $CTPN_ROOT/CTPN/src/layers/* $CTPN_ROOT/CTPN/caffe/src/caffe/layers/
RUN cp $CTPN_ROOT/CTPN/src/*.py $CTPN_ROOT/CTPN/caffe/src/caffe/
RUN cp -r $CTPN_ROOT/CTPN/src/utils $CTPN_ROOT/CTPN/caffe/src/caffe/
# Install Opencv - 2.4.12 :
RUN cd ~ && \
mkdir -p ocv-tmp && \
cd ocv-tmp && \
wget https://github.com/Itseez/opencv/archive/2.4.12.zip && \
unzip 2.4.12.zip && \
cd opencv-2.4.12 && \
mkdir release && \
cd release && \
cmake -D CMAKE_BUILD_TYPE=RELEASE \
-D CMAKE_INSTALL_PREFIX=/usr/local \
-D BUILD_PYTHON_SUPPORT=ON \
.. && \
make -j8 && \
make install && \
rm -rf ~/ocv-tmp
RUN sudo ln /dev/null /dev/raw1394
WORKDIR $CTPN_ROOT/CTPN
RUN make
RUN mkdir /opt/ctpn/CTPN/output
VOLUME ['/opt/ctpn/CTPN/output/']
RUN pip install --upgrade jupyter
RUN mkdir -p -m 700 /root/.jupyter/ && \
echo "c.NotebookApp.ip = '*'" >> /root/.jupyter/jupyter_notebook_config.py
RUN pip install numpy --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip install ipyparallel -i https://pypi.tuna.tsinghua.edu.cn/simple
WORKDIR /opt/ctpn/CTPN/
EXPOSE 8888
#COPY run_jupyter.sh /
#CMD ["/run_jupyter.sh", "--allow-root"]
#CMD ["jupyter", "notebook", "--no-browser", "--allow-root"]
CMD jupyter notebook --no-browser --ip=0.0.0.0 --allow-root
import sys
sys.path.append('/opt/ctpn/CTPN/tools/')
from cfg import Config as cfg
#from other import enlarge_boxes, get_output_name, draw_boxes, resize_im, CaffeModel
from other import draw_boxes, resize_im, CaffeModel
import cv2, os, caffe, sys
from detectors import TextProposalDetector, TextDetector
import os.path as osp
from utils.timer import Timer
from IPython.core.display import Image, display
#from tesseract_ocr import tesseract_ocr
DEMO_IMAGE_DIR="demo_images/pic_folder"
NET_DEF_FILE="models/deploy.prototxt"
MODEL_FILE="models/ctpn_trained_model.caffemodel"
caffe.set_mode_cpu()
#if len(sys.argv)>1 and sys.argv[1]=="--no-gpu":
# caffe.set_mode_cpu()
#else:
# caffe.set_mode_gpu()
# caffe.set_device(cfg.TEST_GPU_ID)
text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
text_detector=TextDetector(text_proposals_detector)
from glob import glob
timer=Timer()
for im_name in glob("demo_images/pic_folder/*.jpg"):
print im_name
im_file=osp.join(im_name)
im=cv2.imread(im_file)
timer.tic()
im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
text_lines,text_proposals,scores=text_detector.detect(im)
# text_lines=enlarge_boxes(text_lines)
print "Time: %f"%timer.toc()
print text_lines
# im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, is_display=True)
box_count = 0
for box in text_lines:
left, top, right, bottom = box[:4]
img_height, img_width = im.shape[0], im.shape[1]
padding_x = int((right - left) / 4)
width = padding_x * 2 + (right - left)
height = (64 * width / 128)
padding_y = int((height - (bottom - top))/2)
new_left = int(max(0, left - padding_x))
new_right = int(min(img_width - 1, right + padding_x))
new_top = int(max(0, top - padding_y))
new_bottom = int(min(img_height - 1, bottom + padding_y))
crop_img = im[new_top: new_bottom, new_left: new_right]
#crop_img = cv2.cvtColor(crop_img,cv2.COLOR_RGB2GRAY)
crop_img = cv2.resize(crop_img, (300,300))
cv2.imwrite('demo_images/pic_folder/box_text_proposals_{}.jpg'.format(box_count),crop_img)
box_count += 1