Open park671 opened 1 week ago
You cannot use NewStringUTF for this, you will have to manually decode from UTF-8.
Cribbing from, we will do the equivalent of
Charset.forName("UTF-8").decode(bb).toString():
as follows, where each paragraph roughly implements one step, and the last sets your object field to the result:
jobject bb = env->NewDirectByteBuffer((void *) cStringValue, strlen(cStringValue));
jclass cls_Charset = env->FindClass("java/nio/charset/Charset");
jmethodID mid_Charset_forName = env->GetStaticMethodID(cls_Charset, "forName", "(Ljava/lang/String;)Ljava/nio/charset/Charset;");
jobject charset = env->CallStaticObjectMethod(cls_Charset, mid_Charset_forName, env->NewStringUTF("UTF-8"));
jmethodID mid_Charset_decode = env->GetMethodID(cls_Charset, "decode", "(Ljava/nio/ByteBuffer;)Ljava/nio/CharBuffer;");
jobject cb = env->CallObjectMethod(charset, mid_Charset_decode, bb);
jclass cls_CharBuffer = env->FindClass("java/nio/CharBuffer");
jmethodID mid_CharBuffer_toString = env->GetMethodID(cls_CharBuffer, "toString", "()Ljava/lang/String;");
jstring str = env->CallObjectMethod(cb, mid_CharBuffer_toString);
env->SetObjectField(jPosRec, myJniPosRec->_myJavaStringValue, str);
The above Stack Overflow might be misleading. The potential cause of this bug could be that the C++ JNI interface, autogenerated by SWIG, does not support incomplete UTF strings generated by the module during type conversion.
As I mentioned above, the bug is due to the SWIG auto-generated JNI translation layer, which crashes when dealing with incomplete UTF-8 encoding (Chinese characters are 3 bytes). I have fixed this issue by using an inline hook to modify the char* returned by the DS_IntermediateDecode method, truncating the incomplete characters at the end.
proxy_DS_IntermediateDecode's code :
void *proxy_DS_IntermediateDecode(void *aSctx) {
LOG("proxy_DS_IntermediateDecode(): aSctx addr=%p", aSctx);
char *result = (char *)((DS_IntermediateDecode) orig_DS_IntermediateDecode)(aSctx);
int len = strlen(result);
if (len <= 0) {
origin_string = NULL;
return result;
}
origin_string = result;
LOG("proxy_DS_IntermediateDecode(): strlen=%d", len);
char *complete_utf8_string = get_complete_utf8_string(origin_string, len);
LOG("proxy_DS_IntermediateDecode(): origin=%s --> complete=%s", (char *) origin_string, complete_utf8_string);
return complete_utf8_string;
}
proxy_DS_FreeString's code:
void proxy_DS_FreeString(char *complete_utf8_string) {
LOG("proxy_DS_FreeString(): %s", complete_utf8_string);
((DS_FreeString) orig_DS_FreeString)(complete_utf8_string);
if (origin_string != NULL) {
free(origin_string);
origin_string = NULL;
}
return;
}
decleard field:
char *origin_string = NULL;
get_complete_utf8_string's code:
char *get_complete_utf8_string(const char *input, int input_length) {
char *output = (char *) malloc(input_length + 1);
if (output == NULL) {
fprintf(stderr, "Memory allocation failed\n");
return NULL;
}
int i = 0;
int output_index = 0;
while (i < input_length) {
unsigned char lead = input[i];
int char_size = 0;
if (lead < 0x80) {
LOG("1byte utf8");
char_size = 1;
} else if ((lead >> 5) == 0x6) {
LOG("2byte utf8");
char_size = 2;
} else if ((lead >> 4) == 0xE) {
LOG("3byte utf8");
char_size = 3;
} else if ((lead >> 3) == 0x1E) {
LOG("4byte utf8");
char_size = 4;
} else {
i++;
continue;
}
if (i + char_size > input_length) {
LOG("incomplete utf8!");
break;
}
memcpy(output + output_index, input + i, char_size);
output_index += char_size;
i += char_size;
}
output[output_index] = '\0';
return output;
}