Closed techee closed 1 month ago
You can replace ugly PackLineState()
and UnpackLineState()
with my BacktrackToStart()
(used by many lexers in Notepad4, Bash, Perl and Ruby lexers in Lexilla also uses backtracking), the code is more readable, following is diff against my current LexDart.cxx. interpolatingStack
code is taken from issue #94.
diff --git a/scintilla/lexers/LexDart.cxx b/scintilla/lexers/LexDart.cxx
index 087350a7..70a7c096 100644
--- a/scintilla/lexers/LexDart.cxx
+++ b/scintilla/lexers/LexDart.cxx
@@ -20,7 +20,6 @@
#include "CharacterSet.h"
#include "StringUtils.h"
#include "LexerModule.h"
-#include "LexerUtils.h"
using namespace Lexilla;
@@ -50,6 +49,7 @@ struct EscapeSequence {
enum {
DartLineStateMaskLineComment = 1, // line comment
DartLineStateMaskImport = (1 << 1), // import
+ DartLineStateMaskInterpolation = (1 << 2),
};
//KeywordIndex++Autogenerated -- start of section automatically generated
@@ -70,10 +70,32 @@ enum class KeywordType {
While = 0x41,
};
-static_assert(DefaultNestedStateBaseStyle + 1 == SCE_DART_STRING_SQ);
-static_assert(DefaultNestedStateBaseStyle + 2 == SCE_DART_STRING_DQ);
-static_assert(DefaultNestedStateBaseStyle + 3 == SCE_DART_TRIPLE_STRING_SQ);
-static_assert(DefaultNestedStateBaseStyle + 4 == SCE_DART_TRIPLE_STRING_DQ);
+// string interpolating state
+struct InterpolatingState {
+ int state;
+ int braceCount;
+};
+
+void BacktrackToStart(const LexAccessor &styler, int stateMask, Sci_PositionU &startPos, Sci_Position &lengthDoc, int &initStyle) noexcept {
+ const Sci_Line currentLine = styler.GetLine(startPos);
+ if (currentLine != 0) {
+ Sci_Line line = currentLine - 1;
+ int lineState = styler.GetLineState(line);
+ while ((lineState & stateMask) != 0 && line != 0) {
+ --line;
+ lineState = styler.GetLineState(line);
+ }
+ if ((lineState & stateMask) == 0) {
+ ++line;
+ }
+ if (line != currentLine) {
+ const Sci_PositionU endPos = startPos + lengthDoc;
+ startPos = (line == 0)? 0 : styler.LineStart(line);
+ lengthDoc = endPos - startPos;
+ initStyle = (startPos == 0)? 0 : styler.StyleAt(startPos - 1);
+ }
+ }
+}
constexpr bool IsDartIdentifierStart(int ch) noexcept {
return IsIdentifierStart(ch) || ch == '$';
@@ -112,7 +134,7 @@ void ColouriseDartDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSt
KeywordType kwType = KeywordType::None;
int chBeforeIdentifier = 0;
- std::vector<int> nestedState; // string interpolation "${}"
+ std::vector<InterpolatingState> interpolatingStack;
int visibleChars = 0;
int chBefore = 0;
@@ -120,20 +142,15 @@ void ColouriseDartDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSt
int chPrevNonWhite = 0;
EscapeSequence escSeq;
+ if (startPos != 0) {
+ // backtrack to the line starts expression inside interpolated string.
+ BacktrackToStart(styler, DartLineStateMaskInterpolation, startPos, lengthDoc, initStyle);
+ }
+
StyleContext sc(startPos, lengthDoc, initStyle, styler);
if (sc.currentLine > 0) {
- int lineState = styler.GetLineState(sc.currentLine - 1);
- /*
- 2: lineStateLineType
- 6: commentLevel
- 3: nestedState count
- 3*4: nestedState
- */
- commentLevel = (lineState >> 2) & 0x3f;
- lineState >>= 8;
- if (lineState) {
- UnpackLineState(lineState, nestedState);
- }
+ const int lineState = styler.GetLineState(sc.currentLine - 1);
+ commentLevel = lineState >> 4;
}
if (startPos == 0) {
if (sc.Match('#', '!')) {
@@ -306,7 +323,7 @@ void ColouriseDartDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSt
sc.SetState(SCE_DART_OPERATOR2);
sc.Forward();
if (sc.ch == '{') {
- nestedState.push_back(escSeq.outerState);
+ interpolatingStack.push_back({sc.state, 1});
} else if (sc.ch != '$' && IsDartIdentifierStart(sc.ch)) {
sc.SetState(SCE_DART_SIMPLE_IDENTIFIER);
} else { // error
@@ -388,14 +405,18 @@ void ColouriseDartDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSt
sc.SetState(SCE_DART_SYMBOL_OPERATOR);
} else if (IsAGraphic(sc.ch)) {
sc.SetState(SCE_DART_OPERATOR);
- if (!nestedState.empty()) {
- sc.ChangeState(SCE_DART_OPERATOR2);
+ if (!interpolatingStack.empty() && AnyOf(sc.ch, '{', '}')) {
+ InterpolatingState ¤t = interpolatingStack.back();
if (sc.ch == '{') {
- nestedState.push_back(SCE_DART_DEFAULT);
- } else if (sc.ch == '}') {
- const int outerState = TakeAndPop(nestedState);
- sc.ForwardSetState(outerState);
- continue;
+ current.braceCount += 1;
+ } else {
+ current.braceCount -= 1;
+ if (current.braceCount == 0) {
+ sc.ChangeState(SCE_DART_OPERATOR2);
+ sc.ForwardSetState(current.state);
+ interpolatingStack.pop_back();
+ continue;
+ }
}
}
}
@@ -408,9 +429,9 @@ void ColouriseDartDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSt
}
}
if (sc.atLineEnd) {
- int lineState = (commentLevel << 2) | lineStateLineType;
- if (!nestedState.empty()) {
- lineState |= PackLineState(nestedState) << 8;
+ int lineState = (commentLevel << 4) | lineStateLineType;
+ if (!interpolatingStack.empty()) {
+ lineState |= DartLineStateMaskInterpolation;
}
styler.SetLineState(sc.currentLine, lineState);
lineStateLineType = 0;
You can replace ugly PackLineState() and UnpackLineState() with my BacktrackToStart() (used by many lexers in Notepad4, Bash, Perl and Ruby lexers in Lexilla also uses backtracking), the code is more readable, following is diff against my current LexDart.cxx. interpolatingStack code is taken from issue https://github.com/ScintillaOrg/lexilla/issues/94.
OK, thanks, will have a look at it.
@zufuliu Looks good in principle, but I expect some problem in the implementation - for
var s = """This is also a
${foo(
"$bar"
)}
multi-line string.""";
which I hope is a valid Dart code testing this case I get
where the last line isn't colorized as a string while with the previous version it was. I haven't investigated more yet (also, it's possible I made some mistake when porting your patch).
For portability, #include <algorithm>
for std::min
and std::max
.
the last line isn't colorized as a string while with the previous version it was.
It's my copy paste error:
- interpolatingStack.push_back({sc.state, 1});
+ interpolatingStack.push_back({escSeq.outerState, 1});
It might better to rename IsDeclarableOperator
to IsDefinableOperator
, the latter is what currently used in the doc at https://github.com/dart-lang/sdk/blob/main/sdk/lib/core/symbol.dart#L31
It's my copy paste error
@zufuliu Wonderful, thanks! (I'm getting really lazy with you around :-)
I've also added the code above to the unit test.
It might better to rename IsDeclarableOperator to IsDefinableOperator, the latter is what currently used in the doc at https://github.com/dart-lang/sdk/blob/main/sdk/lib/core/symbol.dart#L31
Done.
For portability, #include
for std::min and std::max.
@nyamatongwe Done.
Some of the Notepad4 features are reasonable depending on implementation clarity. I haven't added some of the library functions from Notepad4 as their behaviour seemed too complex to explain to new lexer authors. Even including more functions and methods can make it more difficult to find the methods they really need. Many lexer authors have limited C++ experience.
folding has been modified to use simple folding like the rest of Scintilla lexers and not folding of the previous line based on brace presence on the next line like Notepad4
This may be an OK feature to add if it can be done cleanly since some people like this brace style and folding behaviour. However, it seems the Dart convention is braces on the keyword line so this won't matter.
"semi-syntactic" coloring of Notepad4 which colors words following a keyword (such as coloring Foo in "class Foo") has been removed as this is not performed in other Scintilla lexers
Some lexers have some of these features. LexPython
knows that class
and def
are followed by class names and function names.
highlighting of tasks such as TODOs in comments has been removed as it isn't present in other Scintilla lexers
Again, some lexers like LexCPP
do this.
It is not worthwhile making every lexer have all the features (and it is a lot of work) but it's OK to include these if the lexer author wants.
LexDart.cxx(159): warning C26447: The function is declared 'noexcept'
but calls function 'GetLine()' which may throw exceptions (f.6).
...
LexDart.cxx(185): warning C26447: The function is declared 'noexcept'
but calls function 'operator[]()' which may throw exceptions (f.6).
As there are non-ASCII characters "···" in the file, this should be communicated to SciTE (and thus Lexilla) with an initial comment:
// coding:utf-8
This may be an OK feature to add if it can be done cleanly since some people like this brace style and folding behaviour. However, it seems the Dart convention is braces on the keyword line so this won't matter.
It's a matter of adding this function
I eventually didn't include it because I'd have to study which of the cases are applicable to Dart and which are for other lexers and I think if such a feature is added, it should be applied to all the lexers for behavior consistency.
Some lexers have some of these features. LexPython knows that class and def are followed by class names and function names.
If desired, I can add it back - I think for function/class definitions it might be good. But the Notepad4 lexer also colorizes every function call (to be precise, anything followed by (
) which I found "too colorful" to my taste (of course one can map it to the default color but I think other lexers really don't do such a thing).
Again, some lexers like LexCPP do this.
It would mean adding
(plus the two small functions above it) to both the Dart and Zig lexers which would mean some code duplication. Similarly to the folding case, if this feature is added, I believe the function should be usable by all the lexers.
As there are non-ASCII characters "···" in the file, this should be communicated to SciTE (and thus Lexilla) with an initial comment:
Done. I guess it should be also added to the already merged TOML lexer test, right?
It would mean adding ... HighlightTaskMarker
That has a hard-coded list of task markers which seems very limited compared to the cpp
lexer's user chosen task markers.
There were some potential character encoding problems but it looks like, while Dart source encoding is a bit under-specified, non-ASCII characters should only be in string literals and comments. That is, according to this issue.
There are inconsistent types for styles, mostly storing into an int
(once unsigned
) from a char
value from LexAccessor::StyleAt
. If int
is to be the type for styles, then LexAccessor::StyleIndexAt
should be called.
I guess it should be also added to the already merged TOML lexer test, right?
OK, 6ac5d7e.
Changes similar to following can fix code folding for triple quoted string when escape sequence or interpolation occurs at line start, complex change is required when SCE_DART_IDENTIFIER_STRING
is classified (e.g. $this
, $ClassName
), so the bug remains in Notepad4.
case SCE_DART_TRIPLE_RAWSTRING_SQ:
case SCE_DART_TRIPLE_RAWSTRING_DQ:
case SCE_DART_TRIPLE_STRING_SQ:
case SCE_DART_TRIPLE_STRING_DQ:
if (style != stylePrev && !AnyOf(stylePrev, SCE_DART_ESCAPECHAR, SCE_DART_OPERATOR_STRING, SCE_DART_IDENTIFIER_STRING)) {
levelNext++;
}
if (style != styleNext && !AnyOf(stylePrev, SCE_DART_ESCAPECHAR, SCE_DART_OPERATOR_STRING, SCE_DART_IDENTIFIER_STRING)) {
levelNext--;
}
break;
var s1 = """multi-line
\n
strings
""";
var s2 = """multi-line
$x
strings
""";
var s3 = """multi-line
${x}
strings
""";
Original code folding code come from issue #132, so Ruby still has same bug, but it's hard to fix.
%W(
#{1 + 1}
)
There are inconsistent types for styles, mostly storing into an int (once unsigned) from a char value from LexAccessor::StyleAt. If int is to be the type for styles, then LexAccessor::StyleIndexAt should be called.
Fixed.
@nyamatongwe Should I squash the commits and rebase the branch on top of master to resolve the conflicts caused by previous troff lexer merge?
Changes similar to following can fix code folding for triple quoted string when escape sequence or interpolation occurs at line start, complex change is required when SCE_DART_IDENTIFIER_STRING is classified (e.g. $this, $ClassName), so the bug remains in Notepad4.
That still produces incorrect output when you e.g. insert some text lines before the escape sequence:
var s1 = """multi-line
foo
bar
baz
\n
strings
""";
I think without some complex logic correctly taking care of interpolation nesting you'll always run into problems like this. So I kept the original version for simplicity.
Should I squash the commits and rebase the branch on top of master ...
While that can make things easier, I'll almost always make at least a couple more changes - a change log entry and add warning suppressions to cppcheck.suppress
. So the pull request doesn't get applied directly and I commonly use WinMerge to apply a freshly checked out copy of the pull request against my main development directory.
That still produces incorrect output when you e.g. insert some text lines before the escape sequence:
- if (style != styleNext && !AnyOf(stylePrev,
+ if (style != styleNext && !AnyOf(styleNext,
case SCE_DART_TRIPLE_RAWSTRING_SQ:
case SCE_DART_TRIPLE_RAWSTRING_DQ:
case SCE_DART_TRIPLE_STRING_SQ:
case SCE_DART_TRIPLE_STRING_DQ:
if (style != stylePrev && !AnyOf(stylePrev, SCE_DART_ESCAPECHAR, SCE_DART_OPERATOR_STRING, SCE_DART_IDENTIFIER_STRING)) {
levelNext++;
}
if (style != styleNext && !AnyOf(styleNext, SCE_DART_ESCAPECHAR, SCE_DART_OPERATOR_STRING, SCE_DART_IDENTIFIER_STRING)) {
levelNext--;
}
break;
@zufuliu Thanks! I've just added a commit with this version.
This patch adds the Dart lexer from the Notepad4 editor originally created by Zufu Liu (@zufuliu).
Some changes have been made to make it compile and work in Scintilla since Notepad4 contains a modified Scintilla version. Also, some features of the Notepad4 lexer have been removed/changed:
The full diff is here:
Full diff
```diff --- notepad4/scintilla/lexers/LexDart.cxx 2024-08-21 11:44:51.924318156 +0200 +++ lexilla/lexers/LexDart.cxx 2024-08-20 23:22:14.215713622 +0200 @@ -1,6 +1,10 @@ -// This file is part of Notepad4. -// See License.txt for details about distribution and modification. -//! Lexer for Dart. +// Scintilla source code edit control +/** @file LexDart.cxx + ** Lexer for Dart. + **/ +// Based on Zufu Liu's Notepad4 Dart lexer +// Modified for Scintilla by Jiri Techet, 2024 +// The License.txt file describes the conditions under which this software may be distributed. #includePlease let me know your opinion and what you think should be modified. For more discussion and clarification of the above points, see https://github.com/zufuliu/notepad4/issues/806#issuecomment-2286862292 and below.
If the troff lexer gets merged first, the lexer ID in this patch will have to be modified.
Fixes #58.