Here is the patch, changed SCE_RB_WORD to use the new isSafeAlphaOrHigh() function to match above is_identchar().
ruby-here-doc-0403.zip
```diff
diff --git a/lexers/LexRuby.cxx b/lexers/LexRuby.cxx
index d4bf314c..f5e1aec3 100644
--- a/lexers/LexRuby.cxx
+++ b/lexers/LexRuby.cxx
@@ -48,6 +48,10 @@ inline bool isSafeAlpha(char ch) noexcept {
return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
}
+inline bool isSafeAlphaOrHigh(char ch) noexcept {
+ return isHighBitChar(ch) || isalpha(ch) || ch == '_';
+}
+
inline bool isSafeAlnum(char ch) noexcept {
return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
}
@@ -613,7 +617,7 @@ bool sureThisIsNotHeredoc(Sci_Position lt2StartPos, Accessor &styler) {
j += 1;
}
- if (isSafeAlnum(styler[j])) {
+ if (isSafeAlnumOrHigh(styler[j])) {
// Init target_end because some compilers think it won't
// be initialized by the time it's used
target_start = target_end = j;
@@ -622,7 +626,7 @@ bool sureThisIsNotHeredoc(Sci_Position lt2StartPos, Accessor &styler) {
return definitely_not_a_here_doc;
}
for (; j < lengthDoc; j++) {
- if (!isSafeAlnum(styler[j])) {
+ if (!isSafeAlnumOrHigh(styler[j])) {
if (target_quote && styler[j] != target_quote) {
// unquoted end
return definitely_not_a_here_doc;
@@ -877,7 +881,7 @@ void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
styler.ColourTo(i - 1, state);
state = SCE_RB_NUMBER;
is_real_number = true;
- } else if (isHighBitChar(ch) || iswordstart(ch)) {
+ } else if (isSafeAlphaOrHigh(ch)) {
styler.ColourTo(i - 1, state);
state = SCE_RB_WORD;
} else if (ch == '#') {
@@ -945,7 +949,7 @@ void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
chNext = chNext2;
styler.ColourTo(i, SCE_RB_OPERATOR);
- if (!(strchr("\"\'`_-~", chNext2) || isSafeAlpha(chNext2))) {
+ if (!(strchr("\"\'`_-~", chNext2) || isSafeAlphaOrHigh(chNext2))) {
// It's definitely not a here-doc,
// based on Ruby's lexer/parser in the
// heredoc_identifier routine.
```
Note: with the patch applied, when the test case is in DBCS code page, heredoc is not terminated due to DBCS characters been skipped, something like following screenshot:
The heredoc test case in issue #233 in UTF-8 mode is not highlighted correctly.
follow the comment "heredoc_identifier routine" at https://github.com/ScintillaOrg/lexilla/blob/cd5d8661d5b5c72f77e5ea2785e03ba8559f9912/lexers/LexRuby.cxx#L948-L952
heredoc_identifier
source at https://github.com/ruby/ruby/blob/master/parse.y#L8999parser_is_identchar
at https://github.com/ruby/ruby/blob/master/parse.y#L9044is_identchar
at https://github.com/ruby/ruby/blob/master/parse.y#L7255Here is the patch, changed
SCE_RB_WORD
to use the newisSafeAlphaOrHigh()
function to match aboveis_identchar()
. ruby-here-doc-0403.zipNote: with the patch applied, when the test case is in DBCS code page, heredoc is not terminated due to DBCS characters been skipped, something like following screenshot:![image](https://github.com/ScintillaOrg/lexilla/assets/2289926/bab577a9-3add-4e28-a259-00ce99b11b34)