Certain foreign characters don't work in stringtables and the ones that do work require complicated codes.

OndrikB commented 5 years ago

Title. Will require a hookmod to make all of them fully compatible.

OndrikB commented 5 years ago

Update: More codes are also required, as characters like ť aren't even recognized by a hex editor as a legitimate character

JrMasterModelBuilder commented 5 years ago

Codes are transformed here I think: GcStringTableLoader::CleanString

int __stdcall GcStringTableLoader::CleanString(ScFixedString *str)
{
  int str_len; // ebx
  unsigned int v2; // ST0C_4
  unsigned int v3; // ST0C_4
  unsigned int v4; // ST0C_4
  unsigned int v5; // ST0C_4
  unsigned int v6; // ST0C_4
  unsigned int v7; // ST0C_4
  unsigned int v8; // ST0C_4
  unsigned int v9; // ST0C_4
  unsigned int v10; // ST0C_4
  unsigned int v11; // ST0C_4
  unsigned int v12; // ST0C_4
  unsigned int v13; // ST0C_4
  unsigned int v14; // ST0C_4
  unsigned int v15; // ST0C_4
  unsigned int v16; // ST0C_4
  unsigned int v17; // ST0C_4
  unsigned int v18; // ST0C_4
  unsigned int v19; // ST0C_4
  int result; // eax
  unsigned __int8 i; // [esp+4h] [ebp-1CCh]
  unsigned __int8 j; // [esp+4h] [ebp-1CCh]
  int v23; // [esp+8h] [ebp-1C8h]
  unsigned __int8 v24; // [esp+Ch] [ebp-1C4h]
  char Str; // [esp+16h] [ebp-1BAh]
  char v26; // [esp+17h] [ebp-1B9h]

  v26 = 0;
  v24 = 0;

  str_len = str->str.length;
  for ( i = 0; i < (unsigned __int8)str_len; ++i )
  {
    if ( str->str.cstr[i] == 27 )
      ++v24;
  }
  LOBYTE(v23) = 0;

  for ( j = str_len - 1; ; --j )
  {
    result = v23;
    if ( (unsigned __int8)v23 >= v24 )
      break;

    if ( str->str.cstr[j] == '\x1B' )
    {
      switch ( str->str.cstr[j + 1] )
      {
        case '(':
          if ( str->str.cstr[j + 2] == '5' )
          {
            v2 = strlen(asc_74AE2A);
            ScFixedString::replace(str, j, 3u, asc_74AE2A, v2);
            LOBYTE(v23) = v23 + 1;
          }
          break;
        case ')':
          if ( str->str.cstr[j + 2] != '1' && str->str.cstr[j + 2] != '2' )
          {
            if ( str->str.cstr[j + 2] == '6' || str->str.cstr[j + 2] == '7' )
            {
              v4 = strlen(asc_74AE30);
              ScFixedString::replace(str, j, 3u, asc_74AE30, v4);
              LOBYTE(v23) = v23 + 1;
            }
          }
          else
          {
            v3 = strlen(asc_74AE2E);
            ScFixedString::replace(str, j, 3u, asc_74AE2E, v3);
            LOBYTE(v23) = v23 + 1;
          }
          break;
        case 'N':
          if ( str->str.cstr[j + 2] == 'A' )
          {
            switch ( str->str.cstr[j + 3] )
            {
              case 'A':
                Str = -96;
                break;
              case 'E':
                Str = -88;
                break;
              case 'I':
                Str = -84;
                break;
              case 'O':
                Str = -78;
                break;
              case 'U':
                Str = -72;
                break;
              case 'a':
                Str = -63;
                break;
              case 'e':
                Str = -55;
                break;
              case 'i':
                Str = -51;
                break;
              case 'o':
                Str = -45;
                break;
              case 'u':
                Str = -39;
                break;
              default:
                Str = 88;
                break;
            }
            v5 = strlen(&Str);
            ScFixedString::replace(str, j, 4u, &Str, v5);
            LOBYTE(v23) = v23 + 1;
          }
          else if ( str->str.cstr[j + 2] == 'B' )
          {
            switch ( str->str.cstr[j + 3] )
            {
              case 'A':
                Str = -95;
                break;
              case 'E':
                Str = -87;
                break;
              case 'I':
                Str = -83;
                break;
              case 'O':
                Str = -77;
                break;
              case 'U':
                Str = -71;
                break;
              case 'a':
                Str = -62;
                break;
              case 'e':
                Str = -54;
                break;
              case 'i':
                Str = -50;
                break;
              case 'o':
                Str = -44;
                break;
              case 'u':
                Str = -38;
                break;
              default:
                Str = 88;
                break;
            }
            v6 = strlen(&Str);
            ScFixedString::replace(str, j, 4u, &Str, v6);
            LOBYTE(v23) = v23 + 1;
          }
          else
          {
            switch ( str->str.cstr[j + 2] )
            {
              case 'C':
                switch ( str->str.cstr[j + 3] )
                {
                  case 'A':
                    Str = -94;
                    break;
                  case 'E':
                    Str = -86;
                    break;
                  case 'I':
                    Str = -82;
                    break;
                  case 'O':
                    Str = -76;
                    break;
                  case 'U':
                    Str = -70;
                    break;
                  case 'a':
                    Str = -61;
                    break;
                  case 'e':
                    Str = -53;
                    break;
                  case 'i':
                    Str = -49;
                    break;
                  case 'o':
                    Str = -43;
                    break;
                  case 'u':
                    Str = -37;
                    break;
                  default:
                    Str = 88;
                    break;
                }
                v7 = strlen(&Str);
                ScFixedString::replace(str, j, 4u, &Str, v7);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'H':
                switch ( str->str.cstr[j + 3] )
                {
                  case 'A':
                    Str = -92;
                    break;
                  case 'E':
                    Str = -85;
                    break;
                  case 'I':
                    Str = -81;
                    break;
                  case 'O':
                    Str = -74;
                    break;
                  case 'U':
                    Str = -68;
                    break;
                  case 'a':
                    Str = -59;
                    break;
                  case 'e':
                    Str = -52;
                    break;
                  case 'i':
                    Str = -48;
                    break;
                  case 'o':
                    Str = -41;
                    break;
                  case 'u':
                    Str = -35;
                    break;
                  default:
                    Str = 88;
                    break;
                }
                v8 = strlen(&Str);
                ScFixedString::replace(str, j, 4u, &Str, v8);
                LOBYTE(v23) = v23 + 1;
                break;
              case '{':
                Str = -65;
                v9 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v9);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'y':
                Str = -72;
                v10 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v10);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'q':
                Str = -89;
                v11 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v11);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'a':
                Str = -122;
                v12 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v12);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'J':
                Str = -90;
                v13 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v13);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'K':
                Str = -88;
                v14 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v14);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'D':
                Str = -78;
                v15 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v15);
                LOBYTE(v23) = v23 + 1;
                break;
              case '!':
                Str = -32;
                v16 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v16);
                LOBYTE(v23) = v23 + 1;
                break;
              case '?':
                Str = -33;
                v17 = strlen(&Str);
                ScFixedString::replace(str, j, 3u, &Str, v17);
                LOBYTE(v23) = v23 + 1;
                break;
              case 'B':
                if ( str->str.cstr[j + 3] == 'e' )
                  Str = -86;
                else
                  Str = 88;
                v18 = strlen(&Str);
                ScFixedString::replace(str, j, 4u, &Str, v18);
                LOBYTE(v23) = v23 + 1;
                break;
              default:
                v19 = strlen(asc_74AE32);
                ScFixedString::replace(str, j, 3u, asc_74AE32, v19);
                LOBYTE(v23) = v23 + 1;
                break;
            }
          }
          break;
      }
    }
  }
  return result;
}

JrMasterModelBuilder commented 5 years ago

If I'm reading this right, these are all the codes (I don't know what the integer values map to):

(
    5 = "..."
)
    1 = "'"
    2 = "'"
    6 = "-"
    7 = "-"
N
    A
        A = -96
        E = -88
        I = -84
        O = -78
        U = -72
        a = -63
        e = -55
        i = -51
        o = -45
        u = -39
    B
        A = -95
        E = -87
        I = -83
        O = -77
        U = -71
        a = -62
        e = -54
        i = -50
        o = -44
        u = -38
    C
        A = -94
        E = -86
        I = -82
        O = -76
        U = -70
        a = -61
        e = -53
        i = -49
        o = -43
        u = -37
    H
        A = -92
        E = -85
        I = -81
        O = -74
        U = -68
        a = -59
        e = -52
        i = -48
        o = -41
        u = -35
    { = -65
    y = -72
    q = -89
    a = -122
    J = -90
    K = -88
    D = -78
    ! = -32
    ? = -33
    B
        e = -86

Vahkiti commented 3 years ago

@OndrikB How much of this is still relevant?

OndrikB commented 3 years ago

The codes themselves are still necessary to display foreign characters, so I'd say all of it is still relevant. Some of them have been changed, those changes are documented in the writing channel.

OndrikB commented 2 years ago

Big updates:

The bytes are actually unsigned and refer to this font table
Whatever is causing this issue is within GcStringTableLoader::CleanString itself, as NOPing the calls to it resolved the bug. All characters can now be used with one byte each.

TheLegendOfMataNui / game-issues

Certain foreign characters don't work in stringtables and the ones that do work require complicated codes. #225