Kotlin / multik

Multidimensional array library for Kotlin
https://kotlin.github.io/multik/
Apache License 2.0
646 stars 39 forks source link

Application got SIGABRT after sum funtion #146

Closed NguyenTrongThinh closed 2 years ago

NguyenTrongThinh commented 2 years ago

My usage:

`package com.thinhnguyen.xface

class FindDistanceActivityViewModel(): ViewModel() {

private fun l2Normalize(source: NDArray<Float, D1>): NDArray<Float, D1> {
    return source / sqrt((source*source).sum())
}
private fun euclideanDistanceL2(a: List<Float>, b: List<Float>): Float {
    val aNp = mk.ndarray(a)
    val bNp = mk.ndarray(b)
    val aNpNorm = l2Normalize(aNp)
    val bNpNorm = l2Normalize(bNp)
    return sqrt(((aNpNorm - bNpNorm)*(aNpNorm - bNpNorm)).sum())
}
fun findDistance(image: Bitmap){
    viewModelScope.launch {
                ....
            val sourceRepresentVector = mutableListOf(1.0f,2.0f,6.0f,8.0f)
            val targetRepresentVector = mutableListOf(2.3f,5.6f,8.1f,1.2f)
                    val distance = euclideanDistanceL2(sourceRepresentVector, targetRepresentVector)
                    ....
            // Do others background task
    }
}

}`

Logcat while running: ..... runtime.cc:675] "arch_disk_io_3" prio=5 tid=53 Waiting runtime.cc:675] | group="" sCount=1 ucsCount=0 flags=1 obj=0x12e5d5b0 self=0xb400007910b18400 runtime.cc:675] | sysTid=28927 nice=0 cgrp=default sched=0/0 handle=0x788dfe1cb0 runtime.cc:675] | state=S schedstat=( 5396720 283124 4 ) utm=0 stm=0 core=0 HZ=100 runtime.cc:675] | stack=0x788dede000-0x788dee0000 stackSize=1039KB runtime.cc:675] | held mutexes= runtime.cc:675] native: #00 pc 0000000000075f30 /apex/com.android.runtime/lib64/bionic/libc.so (syscall+32) (BuildId: bbbdeb7c87c74f1491f92c6e605095b0) runtime.cc:675] native: #01 pc 0000000000446c08 /apex/com.android.art/lib64/libart.so (art::Thread::Park(bool, long)+404) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #02 pc 0000000000446530 /apex/com.android.art/lib64/libart.so (art::Unsafe_park(_JNIEnv, _jobject, unsigned char, long) (.uniq.319429422067363160645159987129209045680)+148) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] at jdk.internal.misc.Unsafe.park(Native method) runtime.cc:675] - waiting on an unknown object runtime.cc:675] at java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) runtime.cc:675] at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2081) runtime.cc:675] at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:433) runtime.cc:675] at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1063) runtime.cc:675] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1123) runtime.cc:675] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:637) runtime.cc:675] at java.lang.Thread.run(Thread.java:1012) runtime.cc:675] runtime.cc:675] Aborting thread: runtime.cc:675] "main" prio=10 tid=1 Native runtime.cc:675] | group="" sCount=0 ucsCount=0 flags=0 obj=0x7170d3f8 self=0xb40000799d812400 runtime.cc:675] | sysTid=28644 nice=-10 cgrp=default sched=0/0 handle=0x7a4be0e500 runtime.cc:675] | state=R schedstat=( 3584711965 216954483 1043 ) utm=305 stm=53 core=7 HZ=100 runtime.cc:675] | stack=0x7fdaf94000-0x7fdaf96000 stackSize=8188KB runtime.cc:675] | held mutexes= "abort lock" "mutator lock"(shared held) runtime.cc:675] native: #00 pc 00000000005376cc /apex/com.android.art/lib64/libart.so (art::DumpNativeStack(std::1::basic_ostream<char, std::1::char_traits >&, int, BacktraceMap, char const, art::ArtMethod, void, bool)+128) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #01 pc 00000000006f0e24 /apex/com.android.art/lib64/libart.so (art::Thread::DumpStack(std::__1::basic_ostream<char, std::1::char_traits >&, bool, BacktraceMap, bool) const+236) (BuildId: 56e704c544e6c624201be2ab4933e853) A/inhnguyen.xfac: runtime.cc:675] native: #02 pc 00000000006d834c /apex/com.android.art/lib64/libart.so (art::AbortState::DumpThread(std::__1::basic_ostream<char, std::__1::char_traits >&, art::Thread) const+60) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #03 pc 00000000006d7f80 /apex/com.android.art/lib64/libart.so (art::AbortState::Dump(std::1::basic_ostream<char, std::__1::char_traits >&) const+400) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #04 pc 00000000006d2b24 /apex/com.android.art/lib64/libart.so (art::Runtime::Abort(char const*)+1016) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #05 pc 0000000000016ea8 /apex/com.android.art/lib64/libbase.so (android::base::SetAborter(std::1::function<void (char const)>&&)::$_3::__invoke(char const)+80) (BuildId: b77c57f68a484ed93d5a7eda59d83bf9) runtime.cc:675] native: #06 pc 0000000000016450 /apex/com.android.art/lib64/libbase.so (android::base::LogMessage::~LogMessage()+352) (BuildId: b77c57f68a484ed93d5a7eda59d83bf9) runtime.cc:675] native: #07 pc 0000000000445224 /apex/com.android.art/lib64/libart.so (art::JavaVMExt::JniAbort(char const, char const)+1612) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #08 pc 0000000000329258 /apex/com.android.art/lib64/libart.so (art::JavaVMExt::JniAbortV(char const, char const, std::va_list)+108) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #09 pc 000000000048d59c /apex/com.android.art/lib64/libart.so (art::(anonymous namespace)::ScopedCheck::AbortF(char const, ...) (.__uniq.99033978352804627313491551960229047428)+144) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #10 pc 0000000000453974 /apex/com.android.art/lib64/libart.so (art::(anonymous namespace)::ScopedCheck::Check(art::ScopedObjectAccess&, bool, char const, art::(anonymous namespace)::JniValueType) (.__uniq.99033978352804627313491551960229047428)+4300) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #11 pc 00000000005c8920 /apex/com.android.art/lib64/libart.so (art::(anonymous namespace)::CheckJNI::FindClass(_JNIEnv, char const*) (.uniq.99033978352804627313491551960229047428.llvm.5591279935177935698)+188) (BuildId: 56e704c544e6c624201be2ab4933e853) runtime.cc:675] native: #12 pc 0000000000024ca0 /data/data/com.thinhnguyen.xface/cache/jni_multik4678949286564482544/libmultik_jni-androidArm64.so (Java_org_jetbrains_kotlinx_multik_openblas_math_JniMath_sum+460) runtime.cc:675] at org.jetbrains.kotlinx.multik.openblas.math.JniMath.sum(Native method) runtime.cc:675] at org.jetbrains.kotlinx.multik.openblas.math.NativeMath.sum(NativeMath.kt:126) runtime.cc:675] at org.jetbrains.kotlinx.multik.default.math.DefaultMath.sum(DefaultMath.kt:94) runtime.cc:675] at org.jetbrains.kotlinx.multik.ndarray.operations.IteratingNDArrayKt.sum(IteratingNDArray.kt:1242) runtime.cc:675] at com.thinhnguyen.xface.FindDistanceActivityViewModel.l2Normalize(FindDistanceActivityViewModel.kt:50) runtime.cc:675] at com.thinhnguyen.xface.FindDistanceActivityViewModel.euclideanDistanceL2(FindDistanceActivityViewModel.kt:55) runtime.cc:675] at com.thinhnguyen.xface.FindDistanceActivityViewModel.access$euclideanDistanceL2(FindDistanceActivityViewModel.kt:29) runtime.cc:675] at com.thinhnguyen.xface.FindDistanceActivityViewModel$findFace$1.invokeSuspend(FindDistanceActivityViewModel.kt:72) runtime.cc:675] at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33) runtime.cc:675] at kotlinx.coroutines.DispatchedTask.run(DispatchedTask.kt:106) runtime.cc:675] at android.os.Handler.handleCallback(Handler.java:938) runtime.cc:675] at android.os.Handler.dispatchMessage(Handler.java:99) runtime.cc:675] at android.os.Looper.loopOnce(Looper.java:233) runtime.cc:675] at android.os.Looper.loop(Looper.java:344) runtime.cc:675] at android.app.ActivityThread.main(ActivityThread.java:8212) A/inhnguyen.xfac: runtime.cc:675] at java.lang.reflect.Method.invoke(Native method) runtime.cc:675] at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:584) runtime.cc:675] at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:1034) runtime.cc:675] A/inhnguyen.xfac: runtime.cc:683] JNI DETECTED ERROR IN APPLICATION: thread Thread[1,tid=28644,Runnable,Thread*=0xb40000799d812400,peer=0x7170d3f8,"main"] using JNI after critical get runtime.cc:683] in call to FindClass runtime.cc:683] from java.lang.Number org.jetbrains.kotlinx.multik.openblas.math.JniMath.sum(java.lang.Object, int, int, int[], int[], int) A/libc: Fatal signal 6 (SIGABRT), code -1 (SI_QUEUE) in tid 28644 (inhnguyen.xface), pid 28644 (inhnguyen.xface) D/ExternalTextureConv: Created output texture: 18 width: 1080 height: 1080 Disconnected from the target VM, address: 'localhost:51535', transport: 'socket'

My Debug: I found that after execute sum function in l2Normalize the application crashed.

Please help to explain whether we can use mutik in a coroutine? or if it is an issues please help to fix it

NguyenTrongThinh commented 2 years ago

I easily reproduce on my device (oppo reno 4, Android 12) by creating an ndarray that larger than 128 elements.

devcrocod commented 2 years ago

Hi Sorry for taking so long to reply. This error occurs because of an attempt to allocate memory in a jni critical region. In the desktop version, jvm does not block such code, but apparently an error is returned for this in android. Unfortunately, I could not reproduce this behavior on my own (android 12/13, jvm 11).

It confuses me that the error falls not on the allocation itself, but on the FindClass method.

I can get rid of jni method calls inside critical regions, that should help. In this patch release then I will publish.

NguyenTrongThinh commented 2 years ago

Thank you and waiting for new release.