android / codelab-mlkit-android

Other
179 stars 101 forks source link

Text Recognition Text Bounding Box Off #65

Open delasign opened 1 year ago

delasign commented 1 year ago

Android Studio Version: Android Studio Flamingo | 2022.2.1 Patch 1

Problem: The Text Recognition bounding box is off.

This can be best seen by this stack overflow post, whose solution doesnt solve the problem for me. https://stackoverflow.com/questions/66624836/android-locating-words-on-the-screen-google-ml-kit-bounding-boxes-are-off-a-bi

Image Reference from the stack overflow - please note my code doesnt provide boxes that have this styling: https://i.stack.imgur.com/idVzM.jpg

I tried to follow the MLKit guide (https://codelabs.developers.google.com/codelabs/mlkit-android#4) but this doesnt run.

--

What I'm doing:

I am passing a bitmap selected from the photo gallery into the text recognition and it is not finding the boxes precisely.

Code found below

--

build.gradle.kts

plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
    kotlin("plugin.serialization")
}

val versionMajor = 1
val versionMinor = 0
val versionPatch = 1
val versionBuild = 1 // Bump for dogfood builds, public betas, etc.

android {
    namespace = "--"
    compileSdk = 33

    defaultConfig {
        applicationId = "--"
        minSdk = 30
        targetSdk = 33
        versionCode = versionMajor * 10000 + versionMinor * 1000 + versionPatch * 100 + versionBuild
        versionName = "${versionMajor}.${versionMinor}.${versionPatch}"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            // Enables code shrinking, obfuscation, and optimization for only
            // your project's release build type.
            isMinifyEnabled = true

            // Enables resource shrinking, which is performed by the
            // Android Gradle plugin.
            isShrinkResources = true

            // Includes the default ProGuard rules files that are packaged with
            // the Android Gradle plugin. To learn more, go to the section about
            // R8 configuration files.
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.4.5"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

// CameraX
val cameraxVersion = "1.0.1"

dependencies {

    implementation ("androidx.core:core-ktx:1.10.1")
    implementation ("androidx.lifecycle:lifecycle-runtime-ktx:2.6.1")
    implementation ("androidx.activity:activity-compose:1.7.1")
    implementation("androidx.fragment:fragment-ktx:1.5.7")
    implementation (platform("androidx.compose:compose-bom:2022.10.00"))
    implementation ("androidx.compose.ui:ui")
    implementation ("androidx.compose.ui:ui-graphics")
    implementation ("androidx.compose.ui:ui-tooling-preview")
    implementation ("androidx.compose.material3:material3")
    testImplementation ("junit:junit:4.13.2")
    androidTestImplementation ("androidx.test.ext:junit:1.1.5")
    androidTestImplementation ("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation (platform("androidx.compose:compose-bom:2022.10.00"))
    androidTestImplementation ("androidx.compose.ui:ui-test-junit4")
    debugImplementation ("androidx.compose.ui:ui-tooling")
    debugImplementation ("androidx.compose.ui:ui-test-manifest")
    implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.5.0")
    implementation("com.google.code.gson:gson:2.8.9")
    implementation("androidx.constraintlayout:constraintlayout-compose:1.0.1")
    // CAMERA
    implementation("androidx.camera:camera-camera2:$cameraxVersion")
    implementation("androidx.camera:camera-lifecycle:$cameraxVersion")
    implementation("androidx.camera:camera-view:1.3.0-alpha06")
    // Datastore
    implementation("androidx.datastore:datastore-preferences:1.0.0")
    implementation("androidx.datastore:datastore-preferences-rxjava2:1.0.0")
    implementation("androidx.datastore:datastore-preferences-rxjava3:1.0.0")
    // MLKit
    implementation("com.google.android.gms:play-services-mlkit-text-recognition:19.0.0")
}

--

Algorithm:

@SuppressLint("UnsafeOptInUsageError")
fun TextRecognitionCoordinator.analyzeMedia(image: Bitmap) {
    val orientation = context?.display?.rotation ?: return
    CoroutineScope(Dispatchers.IO).launch {
        val image = InputImage.fromBitmap(image, 0)

        var elementArray: Array<Text.TextBlock> = arrayOf()

        // Pass image to an ML Kit Vision API
        recognizer.process(image)
            .addOnSuccessListener { visionText ->
                // Task completed successfully
                if (TextRecognitionCoordinator.debug) {
                    Log.i(
                        "${TextRecognitionCoordinator.identifier}",
                        "${DebuggingIdentifiers.actionOrEventSucceded} processImage | processed recognition"
                    )
                }

                visionText.textBlocks.filter { it -> isPrice(it.text) }.map { it ->
                    if (TextRecognitionCoordinator.debug) {
                        Log.i(
                            "${TextRecognitionCoordinator.identifier}",
                            "${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | text: ${it.text}"
                        )
                    }

                    elementArray = elementArray.plus(it)
                }
                if (TextRecognitionCoordinator.debug) {
                    Log.i(
                        "${TextRecognitionCoordinator.identifier}",
                        "${DebuggingIdentifiers.actionOrEventSucceded} analyzeMedia | closing with count : ${elementArray.count()}"
                    )
                }
            }
            .addOnFailureListener { e ->
                // Task failed with an exception
                // ...
                Log.e(
                    "${TextRecognitionCoordinator.identifier}",
                    "${DebuggingIdentifiers.actionOrEventFailed} analyzeMedia | Failed to recognize text. Error: $e"
                )
                // Return Image Array
                onProcessMedia?.invoke(elementArray)
                // Close Image Proxy

            }
            .addOnCompleteListener { it ->
                // Return Image Array
                onProcessMedia?.invoke(elementArray)
//                    imageProxy.close()
            }
    }
}

--

Composable that's drawn on a full screen card

@Composable
            fun createBoundingBox(element: Text.TextBlock) {
                val boundingBox = element.boundingBox ?: return

                Log.i(
                    "${identifier}",
                    "${DebuggingIdentifiers.actionOrEventSucceded} bounding box ${boundingBox} | top ${boundingBox.top} | left ${boundingBox.left}."
                )

                // Note that that screen width and screen height are the devices width and height.
               val wRatio = screenWidth / imageWidth
               val hRatio = screenHeight / imageHeight

                val elementTop = boundingBox.top * hRatio
                val elementLeft = boundingBox.left * wRatio
                val elementWidth = boundingBox.width() * wRatio
                val elementHeight = boundingBox.height() * hRatio
                Box(
                    modifier = Modifier
                        .constrainAs(createRef()) {
                            top.linkTo(
                                parent.top,
                                elementTop.dp
                            )
                            absoluteLeft.linkTo(
                                parent.absoluteLeft,
                                elementLeft.dp
                            )
                            width = Dimension.value(elementWidth.dp)
                            height = Dimension.value(elementHeight.dp)
                        }
                        .background(Color.Red),
                    content = {}
                )
            }