MichalBusta / DeepTextSpotter

285 stars 101 forks source link

demo.py error! #62

Open lfdeep opened 6 years ago

lfdeep commented 6 years ago

Hello, Excuse me, your demo.py can only test videos, can't test pictures?

xxlxx1 commented 6 years ago

def test_pic(nets):

  global rec_t, image_size

  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 16)
  font2 = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 18)

  impath = "images/demo.jpg"
  im = cv2.imread(impath)

  image_size = [640 / 64 * 64, 480 / 64 * 64]

  scaled = cv2.resize(im, (image_size[0], image_size[1]))
  if nets[0].blobs['data'].data[...].shape[1] == 1:
    scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY )
    scaled = scaled.reshape((scaled.shape[0], scaled.shape[1], 1))

  detections_out, fps = froward_image(nets, scaled, im)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  for detection in detections_out:  
    text = detection[1][0]
    print(text)
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]

    sx = int(detection[0][0][0] - width / 2)
    ex = int(detection[0][0][0] + width / 2)
    sy = int(detection[0][0][1] - 10)
    ey = int(detection[0][0][1] + 10)

    im[sy:ey, sx:ex] = im[sy:ey, sx:ex] / 2 

    boxr  = ((detection[0][0][0], detection[0][0][1]), (detection[0][1][0], detection[0][1][1]), detection[0][2])
    box = cv2.boxPoints(boxr)
    color = (0, 255, 0)
    vis.draw_box_points(im, box, color, thickness = 1)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  draw.text((10, 10), 'FPS: {0:.2f}'.format(fps),(0,255,0),font=font2)        

  #if frame_no < 30:
  #    draw.text((image_size[1] / 2 - 150, image_size[0] / 2 - 100), 'Raw Detections with Dictionary',(0,0,255),font=font3)

  for detection in detections_out:
    text = detection[1][0]
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]
    draw.text((center[0], center[1]), text, fill = (0,255,0),font=font)

  pix = np.array(img)

  #cv2.imshow('draw', scaled)
  cv2.imwrite("images/demo_result_draw.jpg",scaled)
  #
  if pix.shape[0] > 1024:
    pix = cv2.resize(pix, (pix.shape[1] / 2, pix.shape[0] / 2))
  #cv2.imshow('pix', pix)
  cv2.imwrite("images/demo_result_pix.jpg",pix)

  #out.write(pix)

  cv2.waitKey(10)```

just modify a litte, i success.
lo-pan commented 6 years ago
def test_pic(nets):

  global rec_t, image_size

  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 16)
  font2 = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 18)

  impath = "images/demo.jpg"
  im = cv2.imread(impath)

  image_size = [640 / 64 * 64, 480 / 64 * 64]

  scaled = cv2.resize(im, (image_size[0], image_size[1]))
  if nets[0].blobs['data'].data[...].shape[1] == 1:
    scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY )
    scaled = scaled.reshape((scaled.shape[0], scaled.shape[1], 1))

  detections_out, fps = froward_image(nets, scaled, im)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  for detection in detections_out:  
    text = detection[1][0]
    print(text)
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]

    sx = int(detection[0][0][0] - width / 2)
    ex = int(detection[0][0][0] + width / 2)
    sy = int(detection[0][0][1] - 10)
    ey = int(detection[0][0][1] + 10)

    im[sy:ey, sx:ex] = im[sy:ey, sx:ex] / 2 

    boxr  = ((detection[0][0][0], detection[0][0][1]), (detection[0][1][0], detection[0][1][1]), detection[0][2])
    box = cv2.boxPoints(boxr)
    color = (0, 255, 0)
    vis.draw_box_points(im, box, color, thickness = 1)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  draw.text((10, 10), 'FPS: {0:.2f}'.format(fps),(0,255,0),font=font2)        

  #if frame_no < 30:
  #    draw.text((image_size[1] / 2 - 150, image_size[0] / 2 - 100), 'Raw Detections with Dictionary',(0,0,255),font=font3)

  for detection in detections_out:
    text = detection[1][0]
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]
    draw.text((center[0], center[1]), text, fill = (0,255,0),font=font)

  pix = np.array(img)

  #cv2.imshow('draw', scaled)
  cv2.imwrite("images/demo_result_draw.jpg",scaled)
  #
  if pix.shape[0] > 1024:
    pix = cv2.resize(pix, (pix.shape[1] / 2, pix.shape[0] / 2))
  #cv2.imshow('pix', pix)
  cv2.imwrite("images/demo_result_pix.jpg",pix)

  #out.write(pix)

  cv2.waitKey(10)```

just modify a litte, i success.

hi, i have build target pycaffe successfully.But when i run the demo,no module named cmp_trie. Do you have this problem?

MichalBusta commented 6 years ago

installation step 3 ?

ustczhouyu commented 6 years ago

installation step 3 ?

Hi MichalBusta,

I add some layers in both model_cz.prototxt and tiny.prototxt, when I train the model by python train.py, one error occur: valueError: cannot reshape array of size 6204 into shape(22,1,141).This error happens in 260th line in validation.py, that is ctc_f = ctc_f.reshape(ctc_f.shape[0], ctc_f.shape[1], ctc_f.shape[3]). As we know, 6204=2221141, so I want to know than can I change ctc_f = ctc_f.reshape(ctc_f.shape[0], ctc_f.shape[1], ctc_f.shape[3]) to ctc_f = ctc_f.reshape(ctc_f.shape[0], 2ctc_f.shape[1], ctc_f.shape[3]) so that the result is (22,2,141)? Can you help me? Thank you very much.

adifdwimaulana commented 4 years ago
def test_pic(nets):

  global rec_t, image_size

  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 16)
  font2 = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", 18)

  impath = "images/demo.jpg"
  im = cv2.imread(impath)

  image_size = [640 / 64 * 64, 480 / 64 * 64]

  scaled = cv2.resize(im, (image_size[0], image_size[1]))
  if nets[0].blobs['data'].data[...].shape[1] == 1:
    scaled = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY )
    scaled = scaled.reshape((scaled.shape[0], scaled.shape[1], 1))

  detections_out, fps = froward_image(nets, scaled, im)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  for detection in detections_out:  
    text = detection[1][0]
    print(text)
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]

    sx = int(detection[0][0][0] - width / 2)
    ex = int(detection[0][0][0] + width / 2)
    sy = int(detection[0][0][1] - 10)
    ey = int(detection[0][0][1] + 10)

    im[sy:ey, sx:ex] = im[sy:ey, sx:ex] / 2 

    boxr  = ((detection[0][0][0], detection[0][0][1]), (detection[0][1][0], detection[0][1][1]), detection[0][2])
    box = cv2.boxPoints(boxr)
    color = (0, 255, 0)
    vis.draw_box_points(im, box, color, thickness = 1)

  img = Image.fromarray(im)
  draw = ImageDraw.Draw(img)

  draw.text((10, 10), 'FPS: {0:.2f}'.format(fps),(0,255,0),font=font2)        

  #if frame_no < 30:
  #    draw.text((image_size[1] / 2 - 150, image_size[0] / 2 - 100), 'Raw Detections with Dictionary',(0,0,255),font=font3)

  for detection in detections_out:
    text = detection[1][0]
    width, height = draw.textsize(text, font=font)
    center =  [detection[0][0][0] - width / 2, detection[0][0][1] - 10]
    draw.text((center[0], center[1]), text, fill = (0,255,0),font=font)

  pix = np.array(img)

  #cv2.imshow('draw', scaled)
  cv2.imwrite("images/demo_result_draw.jpg",scaled)
  #
  if pix.shape[0] > 1024:
    pix = cv2.resize(pix, (pix.shape[1] / 2, pix.shape[0] / 2))
  #cv2.imshow('pix', pix)
  cv2.imwrite("images/demo_result_pix.jpg",pix)

  #out.write(pix)

  cv2.waitKey(10)```

just modify a litte, i success.

hi, i have build target pycaffe successfully.But when i run the demo,no module named cmp_trie. Do you have this problem?

I had the same problem. Did you solve the problem ?

adifdwimaulana commented 4 years ago

How if I want to run demo.py in python3 ?