Open libofei2004 opened 1 month ago
Hi @libofei2004,
Could you please let us know how you are configuring the SSDMobileNet-V2 (int8) model in the sample app provided by us?
Thank you!!
@kuaashish
/*
* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.mediapipe.examples.objectdetection
import androidx.lifecycle.ViewModel
/**
* This ViewModel is used to store object detector helper settings
*/
class MainViewModel : ViewModel() {
private var _delegate: Int = ObjectDetectorHelper.DELEGATE_GPU //ObjectDetectorHelper.DELEGATE_CPU
private var _threshold: Float =
ObjectDetectorHelper.THRESHOLD_DEFAULT
private var _maxResults: Int = 1
//ObjectDetectorHelper.MAX_RESULTS_DEFAULT
private var _model: Int = ObjectDetectorHelper.MODEL_MOBILENET2 //ObjectDetectorHelper.MODEL_EFFICIENTDETV0
val currentDelegate: Int get() = _delegate
val currentThreshold: Float get() = _threshold
val currentMaxResults: Int get() = _maxResults
val currentModel: Int get() = _model
fun setDelegate(delegate: Int) {
_delegate = delegate
}
fun setThreshold(threshold: Float) {
_threshold = threshold
}
fun setMaxResults(maxResults: Int) {
_maxResults = maxResults
}
fun setModel(model: Int) {
_model = model
}
}
/*
* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.mediapipe.examples.objectdetection
import android.content.Context
import android.graphics.Bitmap
import android.media.MediaMetadataRetriever
import android.net.Uri
import android.os.SystemClock
import android.util.Log
import androidx.annotation.VisibleForTesting
import androidx.camera.core.ImageProxy
import com.google.mediapipe.framework.image.BitmapImageBuilder
import com.google.mediapipe.framework.image.MPImage
import com.google.mediapipe.tasks.core.BaseOptions
import com.google.mediapipe.tasks.core.Delegate
import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions
import com.google.mediapipe.tasks.vision.core.RunningMode
import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector
import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetectorResult
class ObjectDetectorHelper(
var threshold: Float = THRESHOLD_DEFAULT,
var maxResults: Int = MAX_RESULTS_DEFAULT,
var currentDelegate: Int = DELEGATE_GPU,// DELEGATE_CPU,
var currentModel: Int = MODEL_MOBILENETV2,//MODEL_EFFICIENTDETV2,// //MODEL_EFFICIENTDETV0,
var runningMode: RunningMode = RunningMode.IMAGE,
val context: Context,
// The listener is only used when running in RunningMode.LIVE_STREAM
var objectDetectorListener: DetectorListener? = null
) {
// For this example this needs to be a var so it can be reset on changes. If the ObjectDetector
// will not change, a lazy val would be preferable.
private var objectDetector: ObjectDetector? = null
private var imageRotation = 0
private lateinit var imageProcessingOptions: ImageProcessingOptions
init {
setupObjectDetector()
}
fun clearObjectDetector() {
objectDetector?.close()
objectDetector = null
}
// Initialize the object detector using current settings on the
// thread that is using it. CPU can be used with detectors
// that are created on the main thread and used on a background thread, but
// the GPU delegate needs to be used on the thread that initialized the detector
fun setupObjectDetector() {
// Set general detection options, including number of used threads
val baseOptionsBuilder = BaseOptions.builder()
// Use the specified hardware for running the model. Default to CPU
when (currentDelegate) {
DELEGATE_CPU -> {
baseOptionsBuilder.setDelegate(Delegate.CPU)
}
DELEGATE_GPU -> {
// Is there a check for GPU being supported?
baseOptionsBuilder.setDelegate(Delegate.GPU)
}
}
val modelName = when (currentModel) {
MODEL_EFFICIENTDETV0 -> "efficientdet-lite0.tflite"
MODEL_EFFICIENTDETV2 -> "efficientdet-lite2.tflite"
MODEL_MOBILENETV2 -> "ssd_mobilenet_v2.tflite"
MODEL_DOGS -> "dogs.tflite"
MODEL_MOBILENET2 -> "mobilenet2.tflite"
MODEL_EFFICIENTDET2 -> "efficient1.tflite"
else -> "efficientdet-lite0.tflite"
}
baseOptionsBuilder.setModelAssetPath(modelName)
// Check if runningMode is consistent with objectDetectorListener
when (runningMode) {
RunningMode.LIVE_STREAM -> {
if (objectDetectorListener == null) {
throw IllegalStateException(
"objectDetectorListener must be set when runningMode is LIVE_STREAM."
)
}
}
RunningMode.IMAGE, RunningMode.VIDEO -> {
// no-op
}
}
try {
val optionsBuilder = ObjectDetector.ObjectDetectorOptions.builder()
.setBaseOptions(baseOptionsBuilder.build())
.setScoreThreshold(threshold).setRunningMode(runningMode)
.setMaxResults(maxResults)
imageProcessingOptions = ImageProcessingOptions.builder()
.setRotationDegrees(imageRotation).build()
when (runningMode) {
RunningMode.IMAGE, RunningMode.VIDEO -> optionsBuilder.setRunningMode(
runningMode
)
RunningMode.LIVE_STREAM -> optionsBuilder.setRunningMode(
runningMode
).setResultListener(this::returnLivestreamResult)
.setErrorListener(this::returnLivestreamError)
}
val options = optionsBuilder.build()
objectDetector = ObjectDetector.createFromOptions(context, options)
} catch (e: IllegalStateException) {
objectDetectorListener?.onError(
"Object detector failed to initialize. See error logs for details"
)
Log.e(TAG, "TFLite failed to load model with error: " + e.message)
} catch (e: RuntimeException) {
objectDetectorListener?.onError(
"Object detector failed to initialize. See error logs for " + "details",
GPU_ERROR
)
Log.e(
TAG,
"Object detector failed to load model with error: " + e.message
)
}
}
// Return running status of recognizer helper
fun isClosed(): Boolean {
return objectDetector == null
}
// Accepts the URI for a video file loaded from the user's gallery and attempts to run
// object detection inference on the video. This process will evaluate every frame in
// the video and attach the results to a bundle that will be returned.
fun detectVideoFile(
videoUri: Uri, inferenceIntervalMs: Long
): ResultBundle? {
if (runningMode != RunningMode.VIDEO) {
throw IllegalArgumentException(
"Attempting to call detectVideoFile" + " while not using RunningMode.VIDEO"
)
}
if (objectDetector == null) return null
// Inference time is the difference between the system time at the start and finish of the
// process
val startTime = SystemClock.uptimeMillis()
var didErrorOccurred = false
// Load frames from the video and run the object detection model.
val retriever = MediaMetadataRetriever()
retriever.setDataSource(context, videoUri)
val videoLengthMs =
retriever.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION)
?.toLong()
// Note: We need to read width/height from frame instead of getting the width/height
// of the video directly because MediaRetriever returns frames that are smaller than the
// actual dimension of the video file.
val firstFrame = retriever.getFrameAtTime(0)
val width = firstFrame?.width
val height = firstFrame?.height
// If the video is invalid, returns a null detection result
if ((videoLengthMs == null) || (width == null) || (height == null)) return null
// Next, we'll get one frame every frameInterval ms, then run detection on these frames.
val resultList = mutableListOf<ObjectDetectorResult>()
val numberOfFrameToRead = videoLengthMs.div(inferenceIntervalMs)
for (i in 0..numberOfFrameToRead) {
val timestampMs = i * inferenceIntervalMs // ms
retriever.getFrameAtTime(
timestampMs * 1000, // convert from ms to micro-s
MediaMetadataRetriever.OPTION_CLOSEST
)?.let { frame ->
// Convert the video frame to ARGB_8888 which is required by the MediaPipe
val argb8888Frame =
if (frame.config == Bitmap.Config.ARGB_8888) frame
else frame.copy(Bitmap.Config.ARGB_8888, false)
// Convert the input Bitmap object to an MPImage object to run inference
val mpImage = BitmapImageBuilder(argb8888Frame).build()
val startTime1 = SystemClock.uptimeMillis();
// Run object detection using MediaPipe Object Detector API
objectDetector?.detectForVideo(mpImage, timestampMs)
?.let { detectionResult ->
resultList.add(detectionResult)
} ?: {
didErrorOccurred = true
objectDetectorListener?.onError(
"ResultBundle could not be returned" + " in detectVideoFile"
)
}
val lastProcessingTimeMs = SystemClock.uptimeMillis() - startTime1;
println("lastProcessingTimeMs=== " + lastProcessingTimeMs)
} ?: run {
didErrorOccurred = true
objectDetectorListener?.onError(
"Frame at specified time could not be" + " retrieved when detecting in video."
)
}
}
retriever.release()
val inferenceTimePerFrameMs =
(SystemClock.uptimeMillis() - startTime).div(numberOfFrameToRead)
return if (didErrorOccurred) {
null
} else {
ResultBundle(resultList, inferenceTimePerFrameMs, height, width)
}
}
// Runs object detection on live streaming cameras frame-by-frame and returns the results
// asynchronously to the caller.
fun detectLivestreamFrame(imageProxy: ImageProxy) {
if (runningMode != RunningMode.LIVE_STREAM) {
throw IllegalArgumentException(
"Attempting to call detectLivestreamFrame" + " while not using RunningMode.LIVE_STREAM"
)
}
val frameTime = SystemClock.uptimeMillis()
println("width,height========" + imageProxy.width+","+imageProxy.height)
// Copy out RGB bits from the frame to a bitmap buffer
val bitmapBuffer = Bitmap.createBitmap(
imageProxy.width, imageProxy.height, Bitmap.Config.ARGB_8888
)
imageProxy.use { bitmapBuffer.copyPixelsFromBuffer(imageProxy.planes[0].buffer) }
imageProxy.close()
// If the input image rotation is change, stop all detector
if (imageProxy.imageInfo.rotationDegrees != imageRotation) {
imageRotation = imageProxy.imageInfo.rotationDegrees
clearObjectDetector()
setupObjectDetector()
return
}
// Convert the input Bitmap object to an MPImage object to run inference
val mpImage = BitmapImageBuilder(bitmapBuffer).build()
detectAsync(mpImage, frameTime)
}
// Run object detection using MediaPipe Object Detector API
@VisibleForTesting
fun detectAsync(mpImage: MPImage, frameTime: Long) {
// As we're using running mode LIVE_STREAM, the detection result will be returned in
// returnLivestreamResult function
objectDetector?.detectAsync(mpImage, imageProcessingOptions, frameTime)
}
// Return the detection result to this ObjectDetectorHelper's caller
private fun returnLivestreamResult(
result: ObjectDetectorResult, input: MPImage
) {
val finishTimeMs = SystemClock.uptimeMillis()
val inferenceTime = finishTimeMs - result.timestampMs()
println("inferenceTimeMs2222======== " + inferenceTime)
objectDetectorListener?.onResults(
ResultBundle(
listOf(result),
inferenceTime,
input.height,
input.width,
imageRotation
)
)
}
// Return errors thrown during detection to this ObjectDetectorHelper's caller
private fun returnLivestreamError(error: RuntimeException) {
objectDetectorListener?.onError(
error.message ?: "An unknown error has occurred"
)
}
// Accepted a Bitmap and runs object detection inference on it to return results back
// to the caller
fun detectImage(image: Bitmap): ResultBundle? {
if (runningMode != RunningMode.IMAGE) {
throw IllegalArgumentException(
"Attempting to call detectImage" + " while not using RunningMode.IMAGE"
)
}
if (objectDetector == null) return null
// Inference time is the difference between the system time at the start and finish of the
// process
val startTime = SystemClock.uptimeMillis()
// Convert the input Bitmap object to an MPImage object to run inference
val mpImage = BitmapImageBuilder(image).build()
// Run object detection using MediaPipe Object Detector API
objectDetector?.detect(mpImage)?.also { detectionResult ->
val inferenceTimeMs = SystemClock.uptimeMillis() - startTime
println("inferenceTimeMs======== " + inferenceTimeMs)
return ResultBundle(
listOf(detectionResult),
inferenceTimeMs,
image.height,
image.width
)
}
// If objectDetector?.detect() returns null, this is likely an error. Returning null
// to indicate this.
return null
}
// Wraps results from inference, the time it takes for inference to be performed, and
// the input image and height for properly scaling UI to return back to callers
data class ResultBundle(
val results: List<ObjectDetectorResult>,
val inferenceTime: Long,
val inputImageHeight: Int,
val inputImageWidth: Int,
val inputImageRotation: Int = 0
)
companion object {
const val DELEGATE_CPU = 0
const val DELEGATE_GPU = 1
const val MODEL_EFFICIENTDETV0 = 0
const val MODEL_EFFICIENTDETV2 = 1
const val MODEL_MOBILENETV2 = 4
const val MODEL_DOGS = 5;
const val MODEL_MOBILENET2 = 6
const val MODEL_EFFICIENTDET2 = 7
const val MAX_RESULTS_DEFAULT = 3
const val THRESHOLD_DEFAULT = 0.5F
const val OTHER_ERROR = 0
const val GPU_ERROR = 1
const val TAG = "ObjectDetectorHelper"
}
// Used to pass results or errors back to the calling class
interface DetectorListener {
fun onError(error: String, errorCode: Int = OTHER_ERROR)
fun onResults(resultBundle: ResultBundle)
}
}
@kuaashish Could you see my codes?
Have I written custom code (as opposed to using a stock example script provided in MediaPipe)
None
OS Platform and Distribution
android 12
Mobile device if the issue happens on mobile device
android 12
Browser and version if the issue happens on browser
No response
Programming Language and version
java
MediaPipe version
No response
Bazel version
No response
Solution
mediapipe
Android Studio, NDK, SDK versions (if issue is related to building in Android environment)
Android Studio Giraffe | 2022.3.1 Patch 3
Xcode & Tulsi version (if issue is related to building for iOS)
No response
Describe the actual behavior
I have downloaded the models on the page https://ai.google.dev/edge/mediapipe/solutions/vision/object_detector?hl=zh-cn,includes EfficientDet-Lite0 (int8) and SSDMobileNet-V2 (int8) ,then I run the android sample object_detection download from https://github.com/google-ai-edge/mediapipe-samples.I can successfully run EfficientDet-Lite0 (int8), but when I run SSDMobileNet-V2 (int8), an error occued.
Describe the expected behaviour
SSDMobileNet-V2 (int8) was supposed to run properly.
Standalone code/steps you may have used to try to get what you need
Other info / Complete Logs
No response