bourdakos1 / Object-Detection

Leverage TensorFlow's object detection with Watson Visual Recognition
14 stars 17 forks source link

Using own trained model file #1

Closed Zumbalamambo closed 7 years ago

Zumbalamambo commented 7 years ago

How do i use my own trained model file?

bourdakos1 commented 7 years ago

Right now the code looks at the path ssd_mobilenet_v1_coco_11_06_2017/frozen_inference_graph.pb

If you have a trained model file already, you can just change the info starting at line 25: (You can ignore MODEL_FILE and DOWNLOAD_BASE)

# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
bourdakos1 commented 7 years ago

You can get other pre-trained models from TensorFlow's object detection model zoo.

bourdakos1 commented 7 years ago

And if you wanted to use a custom classifier from Watson you can add your classifier id to line 107:

results = visual_recognition.classify(images_file=images_file, threshold=0.7, classifier_ids=['default'])
Zumbalamambo commented 7 years ago

Following is my model name, ckpt and labels

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'retrained_labels.txt')

NUM_CLASSES = 2

I want to detect only two of these birds. so I have trained only with these two birds

bourdakos1 commented 7 years ago

What issues are you having?

Zumbalamambo commented 7 years ago

Im getting the following error ,


Traceback (most recent call last):
  File "/Users/zumbala/Downloads/Object-Detector-App-master/object_detection/utils/label_map_util.py", line 107, in load_labelmap
    text_format.Merge(label_map_string, label_map)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 481, in Merge
    descriptor_pool=descriptor_pool)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 535, in MergeLines
    return parser.MergeLines(lines, message)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 568, in MergeLines
    self._ParseOrMerge(lines, message)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 583, in _ParseOrMerge
    self._MergeField(tokenizer, message)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 652, in _MergeField
    (message_descriptor.full_name, name))
google.protobuf.text_format.ParseError: 1:1 : Message type "object_detection.protos.StringIntLabelMap" has no field named "hummingbird".
Zumbalamambo commented 7 years ago

I have posted the entire code for your reference. Please help me to debug it

import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf

from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'retrained_labels.txt')

NUM_CLASSES = 2

# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)

def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [boxes, scores, classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8)
    return image_np

def worker(input_q, output_q):
    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    fps = FPS().start()
    while True:
        fps.update()
        frame = input_q.get()
        output_q.put(detect_objects(frame, sess, detection_graph))

    fps.stop()
    sess.close()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-src', '--source', dest='video_source', type=int,
                        default=0, help='Device index of the camera.')
    parser.add_argument('-wd', '--width', dest='width', type=int,
                        default=480, help='Width of the frames in the video stream.')
    parser.add_argument('-ht', '--height', dest='height', type=int,
                        default=360, help='Height of the frames in the video stream.')
    parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
                        default=2, help='Number of workers.')
    parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
                        default=5, help='Size of the queue.')
    args = parser.parse_args()

    logger = multiprocessing.log_to_stderr()
    logger.setLevel(multiprocessing.SUBDEBUG)

    input_q = Queue(maxsize=args.queue_size)
    output_q = Queue(maxsize=args.queue_size)
    pool = Pool(args.num_workers, worker, (input_q, output_q))

    video_capture = WebcamVideoStream(src=args.video_source,
                                      width=args.width,
                                      height=args.height).start()
    fps = FPS().start()

    while True:  # fps._numFrames < 120
        frame = video_capture.read()
        input_q.put(frame)

        t = time.time()

        cv2.imshow('Video', output_q.get())
        fps.update()

        print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps.stop()
    print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
    print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))

    pool.terminate()
    video_capture.stop()
    cv2.destroyAllWindows()
bourdakos1 commented 7 years ago

what does your labels file look like?

Zumbalamambo commented 7 years ago

This is how it is looking like

HummingBird
Cuckoo
bourdakos1 commented 7 years ago

normally the labels file would look something like this:

item {
  id: 1
  name: 'HummingBird'
}

item {
  id: 2
  name: 'Cuckoo'
}
bourdakos1 commented 7 years ago

How did you train the model? Or did you get it from somewhere?

bourdakos1 commented 7 years ago

it might help to just output the raw data from the model instead of trying to use the labels, and then add the labels in once you sort it out

Zumbalamambo commented 7 years ago

I created two folders namely HummingBird and Cuckoo. Then I have placed the pictures of HummingBird and Cuckoo in their respective folders.

Then I ran this code . It generated these two files

bourdakos1 commented 7 years ago

Try running this, it might not work:

import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf

from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')

def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [boxes, scores, classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    print(classes)
    return image_np

def worker(input_q, output_q):
    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    fps = FPS().start()
    while True:
        fps.update()
        frame = input_q.get()
        output_q.put(detect_objects(frame, sess, detection_graph))

    fps.stop()
    sess.close()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-src', '--source', dest='video_source', type=int,
                        default=0, help='Device index of the camera.')
    parser.add_argument('-wd', '--width', dest='width', type=int,
                        default=480, help='Width of the frames in the video stream.')
    parser.add_argument('-ht', '--height', dest='height', type=int,
                        default=360, help='Height of the frames in the video stream.')
    parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
                        default=2, help='Number of workers.')
    parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
                        default=5, help='Size of the queue.')
    args = parser.parse_args()

    logger = multiprocessing.log_to_stderr()
    logger.setLevel(multiprocessing.SUBDEBUG)

    input_q = Queue(maxsize=args.queue_size)
    output_q = Queue(maxsize=args.queue_size)
    pool = Pool(args.num_workers, worker, (input_q, output_q))

    video_capture = WebcamVideoStream(src=args.video_source,
                                      width=args.width,
                                      height=args.height).start()
    fps = FPS().start()

    while True:  # fps._numFrames < 120
        frame = video_capture.read()
        input_q.put(frame)

        t = time.time()

        cv2.imshow('Video', output_q.get())
        fps.update()

        print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps.stop()
    print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
    print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))

    pool.terminate()
    video_capture.stop()
    cv2.destroyAllWindows()
Zumbalamambo commented 7 years ago

Thank you so much. now it didnt throw that error. Thanks a lot,lot,lot... The above code has loaded the webcam. But Im not able to see the frame. Also It has thrown the following error.

  File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/pool.py", line 103, in worker
    initializer(*initargs)
  File "/Users/zumbala/Downloads/Object-Detector-App-master/modified.py", line 60, in worker
    output_q.put(detect_objects(frame, sess, detection_graph))
  File "/Users/zumbala/Downloads/Object-Detector-App-master/modified.py", line 23, in detect_objects
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2733, in get_tensor_by_name
    return self.as_graph_element(name, allow_tensor=True, allow_operation=False)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2584, in as_graph_element
    return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
  File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2626, in _as_graph_element_locked
    "graph." % (repr(name), repr(op_name)))
bourdakos1 commented 7 years ago

you should get output like:

[[ 1.  1.  2.  1.  2.  1.  1.  1.  ... ]]

if you also print(scores) you will also get something like:

[[ 0.96264094  0.83880234  0.68226969  0.60378635  0.14331663  0.12331686 ...]]
bourdakos1 commented 7 years ago

That will give you an idea of what the id is for each bird then you can change you label file to something like this:

item {
  id: the_id_I_think_it_is
  name: 'HummingBird'
}

item {
  id: the_id_I_think_it_is
  name: 'Cuckoo'
}
bourdakos1 commented 7 years ago

hmmm, I'm not sure whats going wrong there... It might be something wrong with your model? have you tried running this code on one of the provided models?

bourdakos1 commented 7 years ago

maybe try with tf.Session(graph=detection_graph) as sess: (I'm not great with python, not sure if this will help at all haha)

Zumbalamambo commented 7 years ago

Yes.. it works with those models. How can I create my own custom models for recognition.

Zumbalamambo commented 7 years ago

hehe okay,... same here.. Im totally into java

Zumbalamambo commented 7 years ago

I have made this to work with Android device but this thing is not working in desktop. :(

bourdakos1 commented 7 years ago

To clarify, your retrained model works on Android?

Zumbalamambo commented 7 years ago

Yes it works perfectly with android

bourdakos1 commented 7 years ago

Oh wait okay, it looks like your retrained model isn't for object detection. Are you trying to find the objects in the image, or just label the image in general?

Zumbalamambo commented 7 years ago

Im trying to use it for object detection. Im not intended to use it for image captioning.

Zumbalamambo commented 7 years ago

However it works if I put cv2.imshow('Video', frame)

bourdakos1 commented 7 years ago

so you want to achieve this: https://medium.com/unsupervised-coding/dont-miss-your-target-object-detection-with-tensorflow-and-watson-488e24226ef3

Zumbalamambo commented 7 years ago

wow great. I want to achieve this in a realtime video not in picture

bourdakos1 commented 7 years ago

If so you can follow along with this tutorial to retrain TensorFlow's object detection model: https://github.com/tensorflow/models/blob/master/object_detection/g3doc/running_pets.md

However, it takes an extremely long time to retain (days). It might be better to use the general object detection model, it might locate the birds but not give the type of bird, and then use another model on top that will label the birds with type.

It might not work well realtime since you have to pipe it through 2 models though

Zumbalamambo commented 7 years ago

The one that I have may not work?

Zumbalamambo commented 7 years ago

I have to detect gender of humans as well. I thought of doing it with this.

bourdakos1 commented 7 years ago

The model that you linked to is just for image labelling not localization I believe

Zumbalamambo commented 7 years ago

oh my... thats bad... is there any ways to use open cv to detect the object. Crop the detected object and then recognize it?

bourdakos1 commented 7 years ago

Im not sure, I'd try getting you code to work with localizing objects with the default model. Then try and add your bird model on top and see how fast it is. If its too slow, bite the bullet and try to retrain the model. Object detection isn't an easy task haha...

Zumbalamambo commented 7 years ago

hahaha... it is quite hard... How about gender detection... Do you have any ideas about the same?

Zumbalamambo commented 7 years ago

The one that Im trying wont localize the object?

bourdakos1 commented 7 years ago

You can do gender/age/face localization really easily with Watson, but its an API call so its definitely not good for realtime video haha

bourdakos1 commented 7 years ago

the one you are using will just say there is a humming bird in the image, but won't be able to tell you where in the image it is

Zumbalamambo commented 7 years ago

oh my... this is crazy.. Any other way to do gender recognition ?

bourdakos1 commented 7 years ago

I think open cv can do faces, but not sure about gender

bourdakos1 commented 7 years ago

I'm going to close this issue, we can email if you have any questions on how to approach this

Zumbalamambo commented 7 years ago

ya sure.. I will mail you

skthakor commented 5 years ago

Sir please help me becuse I am trying to resolve this error form last 2 weeks

(tensorflow_gpu) C:\Users\thako\TensorFlow\models\research\object_detection>python train.py --logtostderr --train_dir=training/ --pipeline_config_path=training/faster_rcnn_inception_v2_pets.config WARNING:tensorflow:From C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\platform\app.py:125: main (from main) is deprecated and will be removed in a future version. Instructions for updating: Use object_detection/model_main.py. WARNING:tensorflow:From C:\Users\thako\TensorFlow\models\research\object_detection\legacy\trainer.py:266: create_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.create_global_step Traceback (most recent call last): File "C:\Users\thako\TensorFlow\models\research\object_detection\utils\label_map_util.py", line 141, in load_labelmap text_format.Merge(label_map_string, label_map) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 690, in Merge allow_unknown_field=allow_unknown_field) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 757, in MergeLines return parser.MergeLines(lines, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 782, in MergeLines self._ParseOrMerge(lines, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 804, in _ParseOrMerge self._MergeField(tokenizer, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 896, in _MergeField (message_descriptor.full_name, name)) google.protobuf.text_format.ParseError: 1:1 : Message type "object_detection.protos.StringIntLabelMap" has no field named "if".

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "train.py", line 184, in tf.app.run() File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\platform\app.py", line 125, in run _sys.exit(main(argv)) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\util\deprecation.py", line 250, in new_func return func(*args, **kwargs) File "train.py", line 180, in main graph_hook_fn=graph_rewriter_fn) File "C:\Users\thako\TensorFlow\models\research\object_detection\legacy\trainer.py", line 280, in train train_config.prefetch_queue_capacity, data_augmentation_options) File "C:\Users\thako\TensorFlow\models\research\object_detection\legacy\trainer.py", line 59, in create_input_queue tensor_dict = create_tensor_dict_fn() File "train.py", line 121, in get_next dataset_builder.build(config)).get_next() File "C:\Users\thako\TensorFlow\models\research\object_detection\builders\dataset_builder.py", line 130, in build num_additional_channels=input_reader_config.num_additional_channels) File "C:\Users\thako\TensorFlow\models\research\object_detection\data_decoders\tf_example_decoder.py", line 319, in init default_value=''), File "C:\Users\thako\TensorFlow\models\research\object_detection\data_decoders\tf_example_decoder.py", line 64, in init label_map_proto_file, use_display_name=False) File "C:\Users\thako\TensorFlow\models\research\object_detection\utils\label_map_util.py", line 169, in get_label_map_dict label_map = load_labelmap(label_map_path) File "C:\Users\thako\TensorFlow\models\research\object_detection\utils\label_map_util.py", line 143, in load_labelmap label_map.ParseFromString(label_map_string) TypeError: a bytes-like object is required, not 'str'