Pass custom grammars - Githubissues

JaKXz commented 1 year ago

juliusv commented 1 year ago

I'd also be interested in either passing a custom grammar or some guidance for how to go about adding languages "officially". I'd love to be able to use Code Hike for PromQL educational stuff and there's an old PromQL textmate grammar at https://github.com/prometheus-community/vscode-promql/blob/master/syntaxes/promql.tmlanguage.yml that I could revive. Is Shiki the right place to contribute, or would https://github.com/code-hike/lighter be (if there is interest in adding new langauges at all)?

lachieh commented 10 months ago

Any thoughts on implementing this? I'm happy to assist, but didn't want to work my way toward a solution if maintainers have a specific implementation in mind.

pomber commented 10 months ago

This is something I want to add. I'm working on v1.0, so it will have to wait until I release that, which may take some time.

lachieh commented 10 months ago

As in, work shouldn't start until v1 is released because of churn? Or you'd like to consider this for v1?

pomber commented 10 months ago

As in, work shouldn't start until v1 is released because of churn? Or you'd like to consider this for v1?

As in: I want to include it in v1.0

pomber commented 10 months ago

@lachieh @juliusv can you share your custom grammars and sample code so I can test this with real world scenarios?

lachieh commented 9 months ago

Sure thing! Here's a grammar for the WIT WebAssembly Interface Types language, and a code sample to match:

WIT Grammar

```json { "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", "name": "WIT", "fileTypes": [ "wit" ], "uuid": "73554272-ff1a-4515-879e-39a6dcec955d", "foldingStartMarker": "(\\{|\\[)\\s*", "foldingStopMarker": "\\s*(\\}|\\])", "scopeName": "source.wit", "patterns": [ { "include": "#comment" }, { "include": "#package" }, { "include": "#toplevel-use" }, { "include": "#world" }, { "include": "#interface" }, { "include": "#whitespace" } ], "repository": { "whitespace": { "name": "meta.whitespace.wit", "comment": "whitespace token", "match": "\\s+" }, "comment": { "patterns": [ { "include": "#block-comments" }, { "include": "#doc-comment" }, { "include": "#line-comment" } ] }, "doc-comment": { "name": "comment.line.documentation.wit", "comment": "documentation comments", "begin": "^\\s*///", "end": "$", "patterns": [ { "include": "#markdown" } ] }, "line-comment": { "name": "comment.line.double-slash.wit", "comment": "line comments", "match": "\\s*//.*" }, "block-comments": { "patterns": [ { "name": "comment.block.empty.wit", "comment": "empty block comments", "match": "/\\*\\*/" }, { "name": "comment.block.documentation.wit", "comment": "block documentation comments", "begin": "/\\*\\*", "end": "\\*/", "applyEndPatternLast": 1, "patterns": [ { "include": "#block-comments" }, { "include": "#markdown" }, { "include": "#whitespace" } ] }, { "name": "comment.block.wit", "comment": "block comments", "begin": "/\\*(?!\\*)", "end": "\\*/", "applyEndPatternLast": 1, "patterns": [ { "include": "#block-comments" }, { "include": "#whitespace" } ] } ] }, "markdown": { "patterns": [ { "match": "\\G\\s*(#+.*)$", "captures": { "1": { "name": "markup.heading.markdown" } } }, { "match": "\\G\\s*((\\>)\\s+)+", "captures": { "2": { "name": "punctuation.definition.quote.begin.markdown" } } }, { "match": "\\G\\s*(\\-)\\s+", "captures": { "1": { "name": "punctuation.definition.list.begin.markdown" } } }, { "match": "\\G\\s*(([0-9]+\\.)\\s+)", "captures": { "1": { "name": "markup.list.numbered.markdown" }, "2": { "name": "punctuation.definition.list.begin.markdown" } } }, { "match": "(`.*?`)", "captures": { "1": { "name": "markup.italic.markdown" } } }, { "match": "\\b(__.*?__)", "captures": { "1": { "name": "markup.bold.markdown" } } }, { "match": "\\b(_.*?_)", "captures": { "1": { "name": "markup.italic.markdown" } } }, { "match": "(\\*\\*.*?\\*\\*)", "captures": { "1": { "name": "markup.bold.markdown" } } }, { "match": "(\\*.*?\\*)", "captures": { "1": { "name": "markup.italic.markdown" } } } ] }, "operator": { "patterns": [ { "name": "punctuation.equal.wit", "match": "\\=" }, { "name": "punctuation.comma.wit", "match": "\\," }, { "name": "keyword.operator.key-value.wit", "match": "\\:" }, { "name": "punctuation.semicolon.wit", "match": "\\;" }, { "name": "punctuation.brackets.round.begin.wit", "match": "\$" }, { "name": "punctuation.brackets.round.end.wit", "match": "\$" }, { "name": "punctuation.brackets.curly.begin.wit", "match": "\\{" }, { "name": "punctuation.brackets.curly.end.wit", "match": "\\}" }, { "name": "punctuation.brackets.angle.begin.wit", "match": "\\<" }, { "name": "punctuation.brackets.angle.end.wit", "match": "\\>" }, { "name": "keyword.operator.star.wit", "match": "\\*" }, { "name": "keyword.operator.arrow.skinny.wit", "match": "\\-\\>" } ] }, "package": { "name": "meta.package-decl.wit", "match": "^(package)\\s+([^\\s]+)\\s*", "captures": { "1": { "name": "storage.modifier.package-decl.wit" }, "2": { "name": "meta.id.package-decl.wit", "patterns": [ { "name": "meta.package-identifier.wit", "match": "([^\\:]+)(\\:)([^\\@]+)((\\@)([^\\s]+))?", "captures": { "1": { "name": "entity.name.namespace.package-identifier.wit", "patterns": [ { "include": "#identifier" } ] }, "2": { "name": "keyword.operator.namespace.package-identifier.wit" }, "3": { "name": "entity.name.type.package-identifier.wit", "patterns": [ { "include": "#identifier" } ] }, "5": { "name": "keyword.operator.versioning.package-identifier.wit" }, "6": { "name": "constant.numeric.versioning.package-identifier.wit" } } } ] } } }, "toplevel-use": { "name": "meta.toplevel-use-item.wit", "match": "^(use)\\s+([^\\s]+)(\\s+(as)\\s+([^\\s]+))?\\s*", "captures": { "1": { "name": "keyword.other.use.toplevel-use-item.wit" }, "2": { "name": "meta.interface.toplevel-use-item.wit", "patterns": [ { "name": "entity.name.type.declaration.interface.toplevel-use-item.wit", "match": "\\b%?((?)", "applyEndPatternLast": 1, "endCaptures": { "1": { "name": "punctuation.brackets.angle.end.wit" } } }, "list": { "name": "meta.list.ty.wit", "comment": "Syntax for WIT types such as list", "begin": "\\b(list)\\b(\\<)", "beginCaptures": { "1": { "name": "entity.name.type.list.wit" }, "2": { "name": "punctuation.brackets.angle.begin.wit" } }, "patterns": [ { "include": "#comment" }, { "name": "meta.types.list.wit", "include": "#types" }, { "include": "#whitespace" } ], "end": "(\\>)", "applyEndPatternLast": 1, "endCaptures": { "1": { "name": "punctuation.brackets.angle.end.wit" } } }, "option": { "name": "meta.option.ty.wit", "comment": "Syntax for WIT types such as option", "begin": "\\b(option)\\b(\\<)", "beginCaptures": { "1": { "name": "entity.name.type.option.wit" }, "2": { "name": "punctuation.brackets.angle.begin.wit" } }, "patterns": [ { "include": "#comment" }, { "name": "meta.types.option.wit", "include": "#types" }, { "include": "#whitespace" } ], "end": "(\\>)", "applyEndPatternLast": 1, "endCaptures": { "1": { "name": "punctuation.brackets.angle.end.wit" } } }, "result": { "name": "meta.result.ty.wit", "comment": "Syntax for WIT types such as result", "begin": "\\b(result)\\b", "beginCaptures": { "1": { "name": "entity.name.type.result.wit" }, "2": { "name": "punctuation.brackets.angle.begin.wit" } }, "patterns": [ { "include": "#comment" }, { "name": "meta.inner.result.wit", "begin": "(\\<)", "beginCaptures": { "1": { "name": "punctuation.brackets.angle.begin.wit" } }, "patterns": [ { "include": "#comment" }, { "name": "variable.other.inferred-type.result.wit", "match": "(?)", "applyEndPatternLast": 1, "endCaptures": { "1": { "name": "punctuation.brackets.angle.end.wit" } } }, { "include": "#whitespace" } ], "end": "((?<=\\n)|(?=\\,)|(?=\\}))", "applyEndPatternLast": 1 }, "handle": { "name": "meta.handle.ty.wit", "comment": "Syntax for WIT types such as handle", "match": "\\b(borrow)\\b(\\<)\\s*%?((?)", "captures": { "1": { "name": "entity.name.type.borrow.handle.wit" }, "2": { "name": "punctuation.brackets.angle.begin.wit" }, "3": { "name": "entity.name.type.id.handle.wit" }, "8": { "name": "punctuation.brackets.angle.end.wit" } } }, "identifier": { "name": "entity.name.type.id.wit", "comment": "Syntax for WIT types based on its identifier", "match": "\\b%?((?)", "beginCaptures": { "1": { "name": "keyword.operator.arrow.skinny.wit" } }, "patterns": [ { "include": "#comment" }, { "include": "#types" }, { "include": "#parameter-list" }, { "include": "#whitespace" } ], "end": "((?<=\\n)|(?=\\}))", "applyEndPatternLast": 1 }, "named-type-list": { "name": "meta.named-type-list.wit", "begin": "\\b%?((?

Language Sample

```wit package acme:space-station@0.1.0; interface space-station { type astronaut-id = u64; variant pods { none, list, } flags locations { bridge, nacelle, jefferies-tubes, } enum error-code { access, deadlock, } record astronaut { id: astronaut-id, name: string, ship-access: locations, manager: option, addresses: pods, } record inventory { name: string, stock: u32, tags: list, } resource planetary-scanner { read-via-stream: func() -> result; } /// Initiate scan on planet surface run-scan: func(in: borrow); } interface directory { use space-station.{astronaut-id, astronaut}; get-astronaut: func(id: astronaut-id) -> result; update-astronaut: func(id: astronaut-id, changes: droid) -> result; } world astronauts { import wasi:logging; export directory.{get-astronaut, update-astronaut}; } world reporting { include astronauts; use types.{inventory}; export get-inventory: func(item: option) -> list; } ```

The sample is my own but the grammar comes from the vscode-wit extension here: https://github.com/bytecodealliance/vscode-wit/blob/main/syntaxes/wit.tmLanguage.json

They also have a boatload of test wit files in their repo if you need more: https://github.com/bytecodealliance/vscode-wit/tree/main/tests/grammar/integration

FlippieCoetser commented 9 months ago

@pomber I am also very much interested in custom Grammer. Currently the r Grammer is rather incomplete. I have also played around with the theme editor, which is very helpful, but before I continue with a custom theme I would first like to work on the custom Grammer. Generally speaking is there any kind of tool or process you know of that can be used to create custom a Grammer? It seems a rather manual process by trial and error.

lachieh commented 9 months ago

@FlippieCoetser the grammar for R comes from the shiki source which is a copy from the R VSCode extension source. I'd suggest making an issue and/or contributing a PR to the REditorSupport/vscode-R repo if you want to make improvements to the R grammar.

As for writing your own custom grammars, I have found the VSCode Syntax Highlight Guide to have a lot of helpful resources. Notably, the scope inspector tool is great for debugging.

FlippieCoetser commented 9 months ago

@lachieh thanks for the help and direction! I will do as suggested.

vkarpov15 commented 2 months ago

I managed to get this working using the following steps. I added all these to my postinstall script in package.json

Copy my custom grammar JSON file into node_modules/@code-hike/lighter/grammars
Add entry to LANG_NAMES using the following: require("@code-hike/lighter").LANG_NAMES.push("polar"); in my next.config.js
Run the following script to modify @code-hike/lighter's source code to reference the new grammar, there are a couple of internal objects that list all supported languages that need to be modified:

const fs = require("fs");

const sourceFile = "./node_modules/@code-hike/lighter/dist/index.cjs.js";
const lighterCode = fs.readFileSync(sourceFile, "utf8");
const lines = lighterCode.split("\n");

// Add polar to `aliasOrIdToScope` entry, which isn't exported
const aliasOrIdToScopeIndex = lines.findIndex(line => line.trim() === 'const aliasOrIdToScope = {');
if (aliasOrIdToScopeIndex === -1) {
  throw new Error("Target line not found");
}
lines.splice(aliasOrIdToScopeIndex + 1, 0, "'polar': 'source.polar',");

// Add polar to `scopeToLanguageData`, which isn't exported
const scopeToLanguageDataIndex = lines.findIndex(line => line.trim() === 'const scopeToLanguageData = {');
if (scopeToLanguageDataIndex === -1) {
  throw new Error("Target line not found");
}
lines.splice(scopeToLanguageDataIndex + 1, 0, "'source.polar':{id:'polar',path:'polar.tmLanguage.json',embeddedScopes: []},");

fs.writeFileSync(sourceFile, lines.join("\n"));

code-hike / codehike

Pass custom grammars #297