Closed Zumbalamambo closed 7 years ago
Right now the code looks at the path ssd_mobilenet_v1_coco_11_06_2017/frozen_inference_graph.pb
If you have a trained model file already, you can just change the info starting at line 25: (You can ignore MODEL_FILE and DOWNLOAD_BASE)
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
You can get other pre-trained models from TensorFlow's object detection model zoo.
And if you wanted to use a custom classifier from Watson you can add your classifier id to line 107:
results = visual_recognition.classify(images_file=images_file, threshold=0.7, classifier_ids=['default'])
Following is my model name, ckpt and labels
# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'retrained_labels.txt')
NUM_CLASSES = 2
I want to detect only two of these birds. so I have trained only with these two birds
What issues are you having?
Im getting the following error ,
Traceback (most recent call last):
File "/Users/zumbala/Downloads/Object-Detector-App-master/object_detection/utils/label_map_util.py", line 107, in load_labelmap
text_format.Merge(label_map_string, label_map)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 481, in Merge
descriptor_pool=descriptor_pool)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 535, in MergeLines
return parser.MergeLines(lines, message)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 568, in MergeLines
self._ParseOrMerge(lines, message)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 583, in _ParseOrMerge
self._MergeField(tokenizer, message)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/google/protobuf/text_format.py", line 652, in _MergeField
(message_descriptor.full_name, name))
google.protobuf.text_format.ParseError: 1:1 : Message type "object_detection.protos.StringIntLabelMap" has no field named "hummingbird".
I have posted the entire code for your reference. Please help me to debug it
import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
CWD_PATH = os.getcwd()
# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'retrained_labels.txt')
NUM_CLASSES = 2
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
return image_np
def worker(input_q, output_q):
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
fps = FPS().start()
while True:
fps.update()
frame = input_q.get()
output_q.put(detect_objects(frame, sess, detection_graph))
fps.stop()
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=int,
default=0, help='Device index of the camera.')
parser.add_argument('-wd', '--width', dest='width', type=int,
default=480, help='Width of the frames in the video stream.')
parser.add_argument('-ht', '--height', dest='height', type=int,
default=360, help='Height of the frames in the video stream.')
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
default=2, help='Number of workers.')
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
default=5, help='Size of the queue.')
args = parser.parse_args()
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
input_q = Queue(maxsize=args.queue_size)
output_q = Queue(maxsize=args.queue_size)
pool = Pool(args.num_workers, worker, (input_q, output_q))
video_capture = WebcamVideoStream(src=args.video_source,
width=args.width,
height=args.height).start()
fps = FPS().start()
while True: # fps._numFrames < 120
frame = video_capture.read()
input_q.put(frame)
t = time.time()
cv2.imshow('Video', output_q.get())
fps.update()
print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
pool.terminate()
video_capture.stop()
cv2.destroyAllWindows()
what does your labels file look like?
This is how it is looking like
HummingBird
Cuckoo
normally the labels file would look something like this:
item {
id: 1
name: 'HummingBird'
}
item {
id: 2
name: 'Cuckoo'
}
How did you train the model? Or did you get it from somewhere?
it might help to just output the raw data from the model instead of trying to use the labels, and then add the labels in once you sort it out
I created two folders namely HummingBird
and Cuckoo
.
Then I have placed the pictures of HummingBird and Cuckoo in their respective folders.
Then I ran this code . It generated these two files
Try running this, it might not work:
import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
CWD_PATH = os.getcwd()
# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'birds'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'retrained_graph.pb')
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
print(classes)
return image_np
def worker(input_q, output_q):
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
fps = FPS().start()
while True:
fps.update()
frame = input_q.get()
output_q.put(detect_objects(frame, sess, detection_graph))
fps.stop()
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=int,
default=0, help='Device index of the camera.')
parser.add_argument('-wd', '--width', dest='width', type=int,
default=480, help='Width of the frames in the video stream.')
parser.add_argument('-ht', '--height', dest='height', type=int,
default=360, help='Height of the frames in the video stream.')
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
default=2, help='Number of workers.')
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
default=5, help='Size of the queue.')
args = parser.parse_args()
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
input_q = Queue(maxsize=args.queue_size)
output_q = Queue(maxsize=args.queue_size)
pool = Pool(args.num_workers, worker, (input_q, output_q))
video_capture = WebcamVideoStream(src=args.video_source,
width=args.width,
height=args.height).start()
fps = FPS().start()
while True: # fps._numFrames < 120
frame = video_capture.read()
input_q.put(frame)
t = time.time()
cv2.imshow('Video', output_q.get())
fps.update()
print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
pool.terminate()
video_capture.stop()
cv2.destroyAllWindows()
Thank you so much. now it didnt throw that error. Thanks a lot,lot,lot... The above code has loaded the webcam. But Im not able to see the frame. Also It has thrown the following error.
File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/Users/zumbala/anaconda/lib/python3.5/multiprocessing/pool.py", line 103, in worker
initializer(*initargs)
File "/Users/zumbala/Downloads/Object-Detector-App-master/modified.py", line 60, in worker
output_q.put(detect_objects(frame, sess, detection_graph))
File "/Users/zumbala/Downloads/Object-Detector-App-master/modified.py", line 23, in detect_objects
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2733, in get_tensor_by_name
return self.as_graph_element(name, allow_tensor=True, allow_operation=False)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2584, in as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
File "/Users/zumbala/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2626, in _as_graph_element_locked
"graph." % (repr(name), repr(op_name)))
you should get output like:
[[ 1. 1. 2. 1. 2. 1. 1. 1. ... ]]
if you also print(scores) you will also get something like:
[[ 0.96264094 0.83880234 0.68226969 0.60378635 0.14331663 0.12331686 ...]]
That will give you an idea of what the id is for each bird then you can change you label file to something like this:
item {
id: the_id_I_think_it_is
name: 'HummingBird'
}
item {
id: the_id_I_think_it_is
name: 'Cuckoo'
}
hmmm, I'm not sure whats going wrong there... It might be something wrong with your model? have you tried running this code on one of the provided models?
maybe try with tf.Session(graph=detection_graph) as sess:
(I'm not great with python, not sure if this will help at all haha)
Yes.. it works with those models. How can I create my own custom models for recognition.
hehe okay,... same here.. Im totally into java
I have made this to work with Android device but this thing is not working in desktop. :(
To clarify, your retrained model works on Android?
Yes it works perfectly with android
Oh wait okay, it looks like your retrained model isn't for object detection. Are you trying to find the objects in the image, or just label the image in general?
Im trying to use it for object detection. Im not intended to use it for image captioning.
However it works if I put cv2.imshow('Video', frame)
wow great. I want to achieve this in a realtime video not in picture
If so you can follow along with this tutorial to retrain TensorFlow's object detection model: https://github.com/tensorflow/models/blob/master/object_detection/g3doc/running_pets.md
However, it takes an extremely long time to retain (days). It might be better to use the general object detection model, it might locate the birds but not give the type of bird, and then use another model on top that will label the birds with type.
It might not work well realtime since you have to pipe it through 2 models though
The one that I have may not work?
I have to detect gender of humans as well. I thought of doing it with this.
The model that you linked to is just for image labelling not localization I believe
oh my... thats bad... is there any ways to use open cv to detect the object. Crop the detected object and then recognize it?
Im not sure, I'd try getting you code to work with localizing objects with the default model. Then try and add your bird model on top and see how fast it is. If its too slow, bite the bullet and try to retrain the model. Object detection isn't an easy task haha...
hahaha... it is quite hard... How about gender detection... Do you have any ideas about the same?
The one that Im trying wont localize the object?
You can do gender/age/face localization really easily with Watson, but its an API call so its definitely not good for realtime video haha
the one you are using will just say there is a humming bird in the image, but won't be able to tell you where in the image it is
oh my... this is crazy.. Any other way to do gender recognition ?
I think open cv can do faces, but not sure about gender
I'm going to close this issue, we can email if you have any questions on how to approach this
ya sure.. I will mail you
Sir please help me becuse I am trying to resolve this error form last 2 weeks
(tensorflow_gpu) C:\Users\thako\TensorFlow\models\research\object_detection>python train.py --logtostderr --train_dir=training/ --pipeline_config_path=training/faster_rcnn_inception_v2_pets.config WARNING:tensorflow:From C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\platform\app.py:125: main (from main) is deprecated and will be removed in a future version. Instructions for updating: Use object_detection/model_main.py. WARNING:tensorflow:From C:\Users\thako\TensorFlow\models\research\object_detection\legacy\trainer.py:266: create_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.create_global_step Traceback (most recent call last): File "C:\Users\thako\TensorFlow\models\research\object_detection\utils\label_map_util.py", line 141, in load_labelmap text_format.Merge(label_map_string, label_map) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 690, in Merge allow_unknown_field=allow_unknown_field) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 757, in MergeLines return parser.MergeLines(lines, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 782, in MergeLines self._ParseOrMerge(lines, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 804, in _ParseOrMerge self._MergeField(tokenizer, message) File "C:\Users\thako\Anaconda3\envs\tensorflow_gpu\lib\site-packages\google\protobuf\text_format.py", line 896, in _MergeField (message_descriptor.full_name, name)) google.protobuf.text_format.ParseError: 1:1 : Message type "object_detection.protos.StringIntLabelMap" has no field named "if".
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 184, in
How do i use my own trained model file?