Open bigg-S opened 5 months ago
It is incompatible with the latest version of vision camera. I've successfully managed to get it working with react-native-vision-camera@4.5.0
by editing some of the native code, as well getting around assets limitation on expo managed project. I can share the details if anyone would like.
@Orange9000 Can you please share the details?
@Orange9000 Can you please share the details?
I ended up rewriting the android part completely to support mediapipe instead of regular tensorflow lite. Here is the patch. Note that I got rid of rotation logic, which might be necessary in some projects.
diff --git a/node_modules/vision-camera-realtime-object-detection/android/build.gradle b/node_modules/vision-camera-realtime-object-detection/android/build.gradle
index 7c37883..87ed609 100644
--- a/node_modules/vision-camera-realtime-object-detection/android/build.gradle
+++ b/node_modules/vision-camera-realtime-object-detection/android/build.gradle
@@ -72,6 +72,7 @@ dependencies {
implementation "com.facebook.react:react-native"
api project(":react-native-vision-camera")
implementation "androidx.camera:camera-core:1.1.0-alpha06"
+ implementation 'com.google.mediapipe:tasks-vision:0.10.14'
implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
implementation 'org.tensorflow:tensorflow-lite-task-vision:0.4.0'
}
diff --git a/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/RealtimeObjectDetectionProcessorPluginPackage.kt b/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/RealtimeObjectDetectionProcessorPluginPackage.kt
index 58f83d6..dc9a250 100644
--- a/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/RealtimeObjectDetectionProcessorPluginPackage.kt
+++ b/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/RealtimeObjectDetectionProcessorPluginPackage.kt
@@ -4,12 +4,20 @@ import com.facebook.react.ReactPackage
import com.facebook.react.bridge.NativeModule
import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.uimanager.ViewManager
-import com.mrousavy.camera.frameprocessor.FrameProcessorPlugin
+import com.mrousavy.camera.frameprocessors.FrameProcessorPlugin
+import com.mrousavy.camera.frameprocessors.FrameProcessorPluginRegistry
import com.visioncamerarealtimeobjectdetection.realtimeobjectdetectionprocessor.RealtimeObjectDetectionProcessorPlugin
class RealtimeObjectDetectionProcessorPluginPackage : ReactPackage {
+ companion object {
+ init {
+ FrameProcessorPluginRegistry.addFrameProcessorPlugin("detectObjects") { proxy, options ->
+ RealtimeObjectDetectionProcessorPlugin(proxy, options)
+ }
+ }
+ }
+
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
- FrameProcessorPlugin.register(RealtimeObjectDetectionProcessorPlugin(reactContext))
return emptyList()
}
diff --git a/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/realtimeobjectdetectionprocessor/RealtimeObjectDetectionProcessorPlugin.kt b/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/realtimeobjectdetectionprocessor/RealtimeObjectDetectionProcessorPlugin.kt
index 812784b..592d741 100644
--- a/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/realtimeobjectdetectionprocessor/RealtimeObjectDetectionProcessorPlugin.kt
+++ b/node_modules/vision-camera-realtime-object-detection/android/src/main/java/com/visioncamerarealtimeobjectdetection/realtimeobjectdetectionprocessor/RealtimeObjectDetectionProcessorPlugin.kt
@@ -1,135 +1,87 @@
package com.visioncamerarealtimeobjectdetection.realtimeobjectdetectionprocessor
-import kotlin.math.max
-import android.graphics.Matrix
-import android.graphics.RectF
-import androidx.camera.core.ImageProxy
import com.facebook.react.bridge.ReactApplicationContext
-import com.facebook.react.bridge.ReadableMap
-import com.facebook.react.bridge.WritableNativeArray
-import com.facebook.react.bridge.WritableNativeMap
-import com.google.android.odml.image.MediaMlImageBuilder
-import com.mrousavy.camera.frameprocessor.FrameProcessorPlugin
-import org.tensorflow.lite.task.core.BaseOptions
-import org.tensorflow.lite.task.vision.detector.ObjectDetector
-
-class RealtimeObjectDetectionProcessorPlugin(reactContext: ReactApplicationContext) :
- FrameProcessorPlugin("detectObjects") {
- private val _context: ReactApplicationContext = reactContext
- private var _detector: ObjectDetector? = null
- fun rotateRect(rect: RectF, degrees: Int): RectF {
- val matrix = Matrix()
- matrix.postRotate(degrees.toFloat(), rect.centerX(), rect.centerY())
- val rotatedRect = RectF(rect)
- matrix.mapRect(rotatedRect)
- return rotatedRect
- }
+import com.google.mediapipe.tasks.core.BaseOptions
+import com.google.mediapipe.tasks.vision.core.RunningMode
+import com.google.mediapipe.framework.image.BitmapImageBuilder
+import com.google.mediapipe.tasks.components.containers.Detection
+import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector
+
+import com.mrousavy.camera.frameprocessors.Frame
+import com.mrousavy.camera.frameprocessors.VisionCameraProxy
+import com.mrousavy.camera.frameprocessors.FrameProcessorPlugin
+
+class RealtimeObjectDetectionProcessorPlugin(proxy: VisionCameraProxy, options: Map<String, Any>?): FrameProcessorPlugin() {
+ private val _context: ReactApplicationContext = proxy.context
+ private var _detector: ObjectDetector? = null
- fun getDetectorWithModelFile(config: ReadableMap): ObjectDetector {
+ fun getDetectorWithModelFile(config: Map<String, Any>): ObjectDetector {
if (_detector == null) {
- val modelFile = config.getString("modelFile")
-
- val scoreThreshold = config.getDouble("scoreThreshold").toFloat()
- val maxResults = config.getInt("maxResults")
- val numThreads = config.getInt("numThreads")
-
- val baseOptionsBuilder = BaseOptions.builder().setNumThreads(numThreads)
-
- val optionsBuilder =
- ObjectDetector.ObjectDetectorOptions.builder()
- .setBaseOptions(baseOptionsBuilder.build())
- .setScoreThreshold(scoreThreshold)
- .setMaxResults(maxResults)
-
- _detector =
- ObjectDetector.createFromFileAndOptions(
- _context,
- "custom/$modelFile",
- optionsBuilder.build()
- )
+ val modelFile = config["modelFile"].toString()
+
+ val maxResults = (config["maxResults"] as? Number)?.toInt()
+ val scoreThreshold = (config["scoreThreshold"] as? Number)?.toFloat()
+
+ val optionsBuilder = ObjectDetector.ObjectDetectorOptions.builder()
+ .setBaseOptions(BaseOptions.builder().setModelAssetPath(modelFile).build())
+ .setRunningMode(RunningMode.IMAGE)
+ .setMaxResults(maxResults)
+
+ if (scoreThreshold != null && scoreThreshold > 0) {
+ optionsBuilder.setScoreThreshold(scoreThreshold)
+ }
+
+ val options = optionsBuilder.build()
+
+ _detector = ObjectDetector.createFromOptions(_context, options);
}
return _detector!!
}
- override fun callback(frame: ImageProxy, params: Array<Any>): WritableNativeArray {
+ fun convertToConfigWithDefault(input: Map<String, Any>?): Map<String, Any> {
+ return input ?: emptyMap()
+ }
+
+ override fun callback(frame: Frame, arguments: Map<String, Any>?): Any? {
val mediaImage = frame.image
+ val results: MutableList<Any> = arrayListOf()
+
if (mediaImage == null) {
- return WritableNativeArray()
+ return results
}
- val config = params[0] as ReadableMap
-
- val mlImage = MediaMlImageBuilder(mediaImage).build()
+ val config = convertToConfigWithDefault(arguments)
- val frameWidth = mlImage.width
- val frameHeight = mlImage.height
+ val bitmap = frame.getImageProxy().toBitmap()
+ val mlImage = BitmapImageBuilder(bitmap).build()
- // val ratio = max(mlImage.width.toFloat() / frameWidth, mlImage.height.toFloat() / frameHeight)
+ val detectedObjects = getDetectorWithModelFile(config).detect(mlImage)?.detections()
- val results = WritableNativeArray()
- val detectedObjects = getDetectorWithModelFile(config).detect(mlImage)
+ detectedObjects?.forEach { detectedObject ->
+ val labels: MutableList<Any> = arrayListOf()
- for (detectedObject in detectedObjects) {
- val labels = WritableNativeArray()
-
- for (label in detectedObject.categories) {
- val labelMap = WritableNativeMap()
-
- labelMap.putInt("index", label.index)
- labelMap.putString("label", label.label)
- labelMap.putDouble("confidence", label.score.toDouble())
-
- labels.pushMap(labelMap)
+ detectedObject.categories().forEach { label ->
+ labels.add(mapOf(
+ "index" to label.index(),
+ "label" to label.categoryName(),
+ "confidence" to label.score().toDouble()
+ ))
}
- if (labels.size() > 0) {
- val objectMap = WritableNativeMap()
-
- objectMap.putArray("labels", labels)
-
- val top = when (frame.imageInfo.rotationDegrees) {
- 90 -> detectedObject.boundingBox.left / frameWidth
- 180 -> (frameHeight - detectedObject.boundingBox.bottom) / frameHeight
- 270 -> (frameWidth - detectedObject.boundingBox.right) / frameWidth
- else -> detectedObject.boundingBox.top / frameHeight
- }
-
- val height = when (frame.imageInfo.rotationDegrees) {
- 90 -> (detectedObject.boundingBox.right - detectedObject.boundingBox.left) / frameWidth
- 180 -> (detectedObject.boundingBox.bottom - detectedObject.boundingBox.top) / frameHeight
- 270 -> (detectedObject.boundingBox.right - detectedObject.boundingBox.left) / frameWidth
- else -> (detectedObject.boundingBox.bottom - detectedObject.boundingBox.top) / frameHeight
- }
-
- val left = when (frame.imageInfo.rotationDegrees) {
- 90 -> (frameHeight - detectedObject.boundingBox.bottom) / frameHeight
- 180 -> (frameWidth - detectedObject.boundingBox.right) / frameWidth
- 270 -> detectedObject.boundingBox.top / frameHeight
- else -> detectedObject.boundingBox.left / frameWidth
- }
-
- val width = when (frame.imageInfo.rotationDegrees) {
- 90 -> (detectedObject.boundingBox.bottom - detectedObject.boundingBox.top) / frameHeight
- 180 -> (detectedObject.boundingBox.right - detectedObject.boundingBox.left) / frameWidth
- 270 -> (detectedObject.boundingBox.bottom - detectedObject.boundingBox.top) / frameHeight
- else -> (detectedObject.boundingBox.right - detectedObject.boundingBox.left) / frameWidth
- }
-
- println("abcde: ${top} ${left} ${width} ${height}")
- println("xxxxx: ${mediaImage.width} ${mediaImage.height}")
- println("xxxxx: ${frame.imageInfo.rotationDegrees}")
-
- objectMap.putDouble("top", top.toDouble())
- objectMap.putDouble("left", left.toDouble())
- objectMap.putDouble("width", width.toDouble())
- objectMap.putDouble("height", height.toDouble())
-
- results.pushMap(objectMap)
+ if (labels.isNotEmpty()) {
+ results.add(mapOf(
+ "labels" to labels,
+ "top" to detectedObject.boundingBox().top.toDouble(),
+ "left" to detectedObject.boundingBox().left.toDouble(),
+ "width" to detectedObject.boundingBox().width().toDouble(),
+ "height" to detectedObject.boundingBox().height().toDouble()
+ ))
}
}
return results
}
+
}
diff --git a/node_modules/vision-camera-realtime-object-detection/src/index.tsx b/node_modules/vision-camera-realtime-object-detection/src/index.tsx
index 1911567..8e0450b 100644
--- a/node_modules/vision-camera-realtime-object-detection/src/index.tsx
+++ b/node_modules/vision-camera-realtime-object-detection/src/index.tsx
@@ -14,7 +14,6 @@ export interface ObjectLabel {
}
export interface DetectedObject {
- frameRotation: number;
labels: ObjectLabel[];
/**
It expects model file to be in this location: android/app/src/main/assets/trained.tflite
Usage example:
import {Frame, VisionCameraProxy} from 'react-native-vision-camera';
import {FrameProcessorConfig} from 'vision-camera-realtime-object-detection';
const plugin = VisionCameraProxy.initFrameProcessorPlugin('detectObjects', {});
export function scanObjects(
frame: Frame,
config: FrameProcessorConfig,
): string {
'worklet';
if (plugin == null) {
throw new Error('Failed to load Frame Processor Plugin "detectObjects"!');
}
return plugin.call(frame, config);
}
const frameProcessorConfig: FrameProcessorConfig = {
numThreads: 1,
maxResults: 1,
scoreThreshold: 0.1,
modelFile: 'trained.tflite',
};
const frameProcessor = useSkiaFrameProcessor(frame => {
'worklet';
frame.render();
runAsync(frame, () => {
'worklet';
const detectedObjects: DetectedObject[] = scanObjects(
frame,
frameProcessorConfig,
);
const rects: DetectionRectangleInterface[] = [];
for (const inFrameObject of detectedObjects) {
const label = inFrameObject?.labels[0]?.label;
const confidence = inFrameObject?.labels[0]?.confidence * 100;
rects.push({
label,
confidence,
y: inFrameObject.top,
x: inFrameObject.left,
width: inFrameObject.width,
height: inFrameObject.height,
});
}
setRectJS(rects);
});
}, []);
Never got to test it with decent model though, only the poorly trained one, so I am not sure it detects everything correctly. But it is a 100% working example, tested on react-native-vision-camera@4.5.0
.
I also made a kaggle, if you need to train your own model. Let me know if you want me to share it.
After following the guide on how to set up this package, the build keeps failing with the error, " Task :vision-camera-realtime-object-detection:compileDebugKotlin FAILED ". The following is my package.json: "dependencies": { "@babel/plugin-proposal-export-namespace-from": "^7.18.9", "@react-native-async-storage/async-storage": "^1.18.2", "@react-navigation/bottom-tabs": "^6.5.8", "@react-navigation/drawer": "^6.6.3", "@react-navigation/native": "^6.1.17", "@react-navigation/native-stack": "^6.9.26", "@react-navigation/stack": "^6.3.17", "@reduxjs/toolkit": "^1.9.5", "axios": "^1.4.0", "deprecated-react-native-prop-types": "^5.0.0", "eslint-plugin-simple-import-sort": "^8.0.0", "i18n-js": "3.9.2", "lodash": "^4.17.21", "lodash.memoize": "4.1.2", "lottie-react-native": "^5.1.6", "moment": "^2.29.4", "nativewind": "^2.0.11", "react": "18.2.0", "react-native": "^0.72.14", "react-native-fast-tflite": "^1.2.0", "react-native-gesture-handler": "^2.16.2", "react-native-image-picker": "^7.1.2", "react-native-localize": "^2.2.4", "react-native-permissions": "^4.1.5", "react-native-reanimated": "^3.3.0", "react-native-safe-area-context": "^4.10.1", "react-native-screens": "^3.31.1", "react-native-vector-icons": "^9.2.0", "react-native-vision-camera": "^4.0.5", "react-native-worklets-core": "^1.3.3", "react-redux": "^8.1.1", "reanimated-bottom-sheet": "^1.0.0-alpha.22", "redux-persist": "^6.0.0", "redux-thunk": "^2.4.2", "vision-camera-realtime-object-detection": "^0.5.1", "vision-camera-resize-plugin": "^3.1.0" },
what might the issue be?