jacobslusser / ScintillaNET

A Windows Forms control, wrapper, and bindings for the Scintilla text editor.
MIT License
963 stars 243 forks source link

Start of an updated CSharpLexer #444

Closed tobeypeters closed 5 years ago

tobeypeters commented 5 years ago

Edit: Newer better version of the CSharp Lexer on my Github.

So, I've started updating the CSharp Lexer code from the Wiki page. Looking for help, now, to make it better.

Form_Load:

        private void Form1_Load(object sender, EventArgs e)
        {
            sc.CaretStyle = CaretStyle.Block;

            sc.Lexer = Lexer.Container;

            sc.StyleResetDefault();
            sc.Styles[Style.Default].BackColor = IntToColor(0x1E1E1E);
            sc.Styles[Style.Default].ForeColor = IntToColor(0xEFEAEF);
            sc.StyleClearAll();

            //Ugly Test Colors
            sc.Styles[Style.LineNumber].ForeColor = sc.CaretForeColor = IntToColor(0xEFEAEF);
            sc.Styles[CSharpLexer.StyleDefault].ForeColor = IntToColor(0xEFEAEF);
            sc.Styles[CSharpLexer.StyleKeyword].ForeColor = IntToColor(0x35aec6);
            sc.Styles[CSharpLexer.StyleContainerProcedure].ForeColor = Color.HotPink;
            sc.Styles[CSharpLexer.StyleProcedureContainer].ForeColor =
            sc.Styles[CSharpLexer.StyleContextual].ForeColor = IntToColor(0xb4ceaf);
            sc.Styles[CSharpLexer.StyleIdentifier].ForeColor = IntToColor(0xEFEAEF);
            sc.Styles[CSharpLexer.StyleNumber].ForeColor = Color.Purple;
            sc.Styles[CSharpLexer.StyleString].ForeColor = Color.Red;
            sc.Styles[CSharpLexer.StyleComment].ForeColor = Color.Orange;
            sc.Styles[CSharpLexer.StyleProcedure].ForeColor = IntToColor(0x3ac190);
            sc.Styles[CSharpLexer.StyleVerbatim].ForeColor = Color.YellowGreen;
            sc.Styles[CSharpLexer.StylePreprocessor].ForeColor = Color.DarkSlateGray;
            sc.Styles[CSharpLexer.StyleEscapeSequence].ForeColor = Color.Yellow;
            sc.Styles[CSharpLexer.StyleOperator].ForeColor = Color.HotPink;
            sc.Styles[CSharpLexer.StyleBraces].ForeColor = Color.GreenYellow;
            sc.Styles[CSharpLexer.StyleError].ForeColor = Color.DarkRed;
            sc.Styles[CSharpLexer.StyleUser].ForeColor = Color.Olive;

            CSharpLexer.Init_Lexer(sc);
            CSharpLexer.SetKeyWords("abstract add as ascending async await base bool break by byte case catch char checked class const continue decimal default delegate descending do double dynamic else enum equals explicit extern false finally fixed float for foreach from get global global goto goto group if implicit in int interface internal into is join let lock long namespace new null object on operator orderby out override params partial private protected public readonly ref remove return sbyte sealed select set short sizeof stackalloc static string struct switch this throw true try typeof uint ulong unchecked unsafe ushort using value var virtual void volatile where while yield",
                                    inUserKeywords: "Goblin Hammer", AutoFillContextual: true
                                   );
        }

CSharp Lexer:

using ScintillaNET;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

public static class CSharpLexer
{
    private static List<char> NumberTypes = new List<char>
    {
        '0', '1', '2', '3', '4', '5', '6','7', '8', '9',
        'a', 'b', 'c', 'd', 'e', 'f', 'x',
        'A', 'B', 'C', 'D', 'E', 'F', '-', '.'
    };

    private static List<char> EscapeSequences = new List<char>
    {
        '\'', '"', '\\', '0', 'a', 'b', 'f',
        'n', 'r', 't', 'v'
    };

    private static List<string> Operators = new List<string>
    {
        "<<", ">>", "<=", ">=", "+=", "-=", "*=", "&=",
        "|=", "!=", "^=", "->", "??", "=>", "++", "--",
        "==", "&&", "||", "+", "-", "*", "&", "!", "|",
        "^", "~", "=", "<", ">"
    };

    private static List<char> OperatorStragglers = new List<char>
    {
        '*', '&', '?', '-', '!'
    };

    private static List<char> IdentifierMarkers = new List<char>
    {
        '<', '[', '.'
    };

    private static List<string> FullDocument = new List<string>
    {
        "*", "/", "{", "}"
    };

    //Few of these might need renamed
    public const int StyleDefault = 0,
                     StyleKeyword = 1,
                  StyleIdentifier = 2,
                      StyleNumber = 3,
                      StyleString = 4,
                     StyleComment = 5,
                   StyleProcedure = 6,
                  StyleContextual = 7,
                    StyleVerbatim = 8,
                StylePreprocessor = 9,
              StyleEscapeSequence = 10,
                    StyleOperator = 11,
                      StyleBraces = 12,
                       StyleError = 13,
                        StyleUser = 14,
          StyleProcedureContainer = 15,
          StyleContainerProcedure = 16;

    private const int STATE_UNKNOWN = 0,
                   STATE_IDENTIFIER = 1,
                       STATE_NUMBER = 2,
                       STATE_STRING = 3,
            STATE_MULTILINE_COMMENT = 4,
                 STATE_PREPROCESSOR = 5,
                     STATE_OPERATOR = 6;

    public static List<string> KEYWORDS, //Primary keywords
                    CONTEXTUAL_KEYWORDS, //Secondary keywords
                          USER_KEYWORDS; //User-defined keywords

    private static bool IMPORTANT_KEY_DELETED = false;

    public static void SetKeyWords(string inKeywords = "", string inContextualKeywords = "", string inUserKeywords = "", bool AutoFillContextual = false)
    {
        IEnumerable<string> AssemblyTypes()
        {
            return typeof(string).Assembly.GetTypes()
                                     .Where(t => t.IsPublic && t.IsVisible)
                                     .Select(t => new { t.Name, Length = t.Name.IndexOf('`') })
                                     .Select(x => x.Length == -1 ? x.Name : x.Name.Substring(0, x.Length))
                                     .Distinct();
        }

        //Wasn't going to do it this way.  But, I guess, this is more "Flexible".
        CONTEXTUAL_KEYWORDS = new List<string>();

        if (inKeywords != "") { KEYWORDS = new List<string>(inKeywords.Split(' ')); }
        if (inContextualKeywords != "") { CONTEXTUAL_KEYWORDS.AddRange(inContextualKeywords.Split(' ').ToList()); }
        if (inUserKeywords != "") { USER_KEYWORDS = new List<string>(inUserKeywords.Split(' ')); }

        if (AutoFillContextual) { CONTEXTUAL_KEYWORDS.AddRange(AssemblyTypes()); }
    }

    public static void Init_Lexer(Scintilla inScintilla)
    {
        inScintilla.CharAdded += (s, ae) => { IMPORTANT_KEY_DELETED = FullDocument.Contains(ae.Char.ToString()); };

        //PLEASE NOTE I'M ALLOWING THIS TO BE CALLED MULTIPLE TIMES.  JUST IN CASE, IT NEEDS TO BE USED ON MULTIPLE SCINTILLA CONTROLS.
        inScintilla.Delete += (s, de) => { IMPORTANT_KEY_DELETED = (FullDocument.Contains(de.Text) || de.Text == @""""); };

        inScintilla.StyleNeeded += (s, se) =>
        {
            Style(inScintilla, inScintilla.GetEndStyled(), se.Position, IMPORTANT_KEY_DELETED);

            IMPORTANT_KEY_DELETED = false;
        };
    }

    public static void Style(Scintilla scintilla, int startPos, int endPos, bool fullDoc = true)
    {
        startPos = (fullDoc ? 0 : scintilla.Lines[scintilla.LineFromPosition(startPos)].Position);
        endPos = (fullDoc ? (scintilla.Lines[scintilla.Lines.Count].EndPosition - 1) : endPos);

        int style, length = 0, state = STATE_UNKNOWN;

        bool VERBATIM = false, PARENTHESIS = false;

        char c = '\0', d = '\0';

        bool SINGLE_LINE_COMMENT,
              MULTI_LINE_COMMENT,
                         DBL_OPR;

        void ClearState() { length = state = STATE_UNKNOWN; }

        void DefaultStyle() => scintilla.SetStyling(1, StyleDefault);

        int StyleUntilEndOfLine(int inPosition, int inStyle)
        {
            int len = (scintilla.Lines[scintilla.LineFromPosition(inPosition)].EndPosition - inPosition);

            scintilla.SetStyling(len, inStyle);

            return --len; //We return the length, cause we'll have to adjust the startPos.
        }

        bool ContainsUsingStatement(int inPosition) => (scintilla.GetTextRange(scintilla.Lines[scintilla.LineFromPosition(inPosition)].Position, 5)).Contains("using");

        SetKeyWords(AutoFillContextual: true);

        scintilla.StartStyling(startPos);
        {
            for (; startPos < endPos; startPos++)
            {
                //Got rid of half the casts and half of the method calls.
                c = scintilla.Text[startPos];
                d = (char)scintilla.GetCharAt(startPos + 1);

                if (state == STATE_UNKNOWN)
                {
                    bool bFormattedVerbatim = ((c == '$') && (d == '@')),
                                 bFormatted = ((c == '$') && ((d == '"'))),
                               bNegativeNum = ((c == '-') && (char.IsDigit(d))),
                                  bFraction = ((c == '.') && (char.IsDigit(d))),
                                    bString = (c == '"');

                    VERBATIM = ((c == '@') && (d == '"'));

                    SINGLE_LINE_COMMENT = ((c == '/') && (d == '/'));
                    MULTI_LINE_COMMENT = ((c == '/') && (d == '*'));

                    //I always want braces to be highlighted 
                    if ((c == '{') || (c == '}'))
                    {
                        scintilla.SetStyling(1, ((scintilla.BraceMatch(startPos) > -1) ? StyleBraces : StyleError)); //Only works if I load my external lexer.
                    }
                    else if (char.IsLetter(c)) //Indentifier - Keywords, procedures, etc ...
                    {
                        state = STATE_IDENTIFIER;
                        continue;
                    }
                    else if (bString || VERBATIM || bFormatted || bFormattedVerbatim) //String
                    {
                        int len = ((VERBATIM || bFormatted || bFormattedVerbatim) ? ((bFormattedVerbatim) ? 3 : 2) : 1);

                        scintilla.SetStyling(len, (!VERBATIM ? StyleString : StyleVerbatim));

                        startPos += (len - 1);

                        state = STATE_STRING;
                        continue;
                    }
                    else if (char.IsDigit(c) || bNegativeNum || bFraction) //Number
                    {
                        state = STATE_NUMBER;
                        continue;
                    }
                    else if (SINGLE_LINE_COMMENT || MULTI_LINE_COMMENT) //Comment
                    {
                        if (SINGLE_LINE_COMMENT)
                        {
                            startPos += StyleUntilEndOfLine(startPos, StyleComment);
                        }
                        else
                        {
                            scintilla.SetStyling(2, StyleComment);

                            startPos += 2;

                            state = STATE_MULTILINE_COMMENT;
                            continue;
                        }
                    }
                    else if (c == '#') //Preprocessor
                    {
                        startPos += StyleUntilEndOfLine(startPos, StylePreprocessor);
                    }
                    else if (
                                (char.IsSymbol(c) || OperatorStragglers.Contains(c)) && (Operators.Contains($"{c}" +
                                ((DBL_OPR = (char.IsSymbol(d) || OperatorStragglers.Contains(d))) ? $"{d}" : "")))
                            ) //Operators
                    {
                        scintilla.SetStyling((DBL_OPR ? 2 : 1), StyleOperator);

                        startPos += (DBL_OPR ? 1 : 0);
                        continue;
                    }
                    else { DefaultStyle(); }
                }

                switch (state)
                {
                    case STATE_IDENTIFIER:
                        string identifier = scintilla.GetWordFromPosition(startPos);

                        style = StyleIdentifier;

                        int s = startPos;

                        startPos += (identifier.Length - 2);

                        d = (char)scintilla.GetCharAt(startPos + 1);

                        bool OPEN_PAREN = (d == '(');

                        if (!OPEN_PAREN && KEYWORDS.Contains(identifier)) { style = StyleKeyword; } //Keywords
                        else if (!OPEN_PAREN && CONTEXTUAL_KEYWORDS.Contains(identifier)) { style = StyleContextual; } //Contextual Keywords
                        else if (!OPEN_PAREN && USER_KEYWORDS.Contains(identifier)) { style = StyleUser; } //User Keywords
                        else if (OPEN_PAREN) { style = StyleProcedure; } //Procedures
                        else if (IdentifierMarkers.Contains(d) && !(ContainsUsingStatement(startPos))) { style = StyleProcedureContainer; } //Procedure Containers "classes?"
                        else
                        {
                            if ( ((char)scintilla.GetCharAt(s - 2) == '.') && !(ContainsUsingStatement(s))) { style = StyleContainerProcedure; } //Container "procedures"
                        }

                        ClearState();

                        scintilla.SetStyling(identifier.Length, style);

                        break;

                    case STATE_NUMBER:
                        length++;

                        if (!NumberTypes.Contains(c))
                        {
                            scintilla.SetStyling(length, StyleNumber);

                            ClearState();

                            startPos--;
                        }

                        break;

                    case STATE_STRING:
                        length++;

                        style = (VERBATIM ? StyleVerbatim : StyleString);

                        if (PARENTHESIS || ((c == '{') || (d == '}'))) //Formatted strings that are using braces
                        {
                            if (c == '{') { PARENTHESIS = true; }
                            if (c == '}') { PARENTHESIS = false; }
                        }
                        else if (VERBATIM && ((c == '"') && (d == '"'))) //Skip over embedded quotation marks 
                        {
                            length++;
                            startPos++;
                        }
                        else if ((c == '"') && (d != '"')) //End of our string?
                        {
                            scintilla.SetStyling(length, style);
                            ClearState();

                            VERBATIM = false;
                        }
                        else
                        {
                            if ((c == '\\') && EscapeSequences.Contains(d)) //Escape Sequences
                            {
                                length += ((d == '\\') ? 0 : -1);

                                scintilla.SetStyling(length, style);
                                {
                                    startPos++; length = 0;
                                }
                                scintilla.SetStyling(2, StyleEscapeSequence);
                            }
                        }

                        break;

                    case STATE_MULTILINE_COMMENT:
                        length++;

                        if ((c == '*') && (d == '/'))
                        {
                            length += 2;

                            scintilla.SetStyling(length, StyleComment);

                            ClearState();

                            startPos++;
                        }

                        break;
                }
            }
        }
    }
}
tobeypeters commented 5 years ago

Really want to get all this working flawlessly.

Side Note: Need a lexer which tokenizes. I can break the styling pretty easy with this, even excluding my code. I can tell this Jacob guy is a really great coder. Just saying, a normal lexer will make things easier or more fail-safe. I AM still trying to figure out how to adapt this one.

Update : If you Style the entire document every time things work much much better. But, we shouldn't have to do that or do the Lexer & Tokenizer approaches do that anyway.

Update 2 : Changed the Lexer, so you can tell it to do the entire document or just the StyleNeeded text range. I'm not scanning the entire document unless one of the following characters are deleted [ ", /, * ]

Stumpii commented 5 years ago

Any reason you are not using the built in C++ lexer, or using the C++ lexer as a starting point?

Stumpii commented 5 years ago

My ScintillaNET.WPF project uses the C++ lexer for C# by default if you want to play with it.

tobeypeters commented 5 years ago

I know how to use the other Lexers. They're limited and can't be extended. Even, just formatted and Verbatim strings don't display, how I want. I'm after a Visual Studio style highlighting, with a few extras. I can't use just StyleNeeded() and stuff. Cause you got to set the Lexer to Container. I thought you could use UpdateUI originally as a hack. But, depending on what you do, it causes the CPU to peg at about 25%.