siefkenj / unified-latex

Utilities for parsing and manipulating LaTeX ASTs with the Unified.js framework
MIT License
85 stars 20 forks source link

Parbreak between item environment has no position #73

Closed pddg closed 6 months ago

pddg commented 6 months ago

Environment

What happened

Sample tex source is here.

\begin{itemize}
    \item First item
    \item Second item
\end{itemize}

Line break between \item First item and \item Second item is parsed as Parbreak. However it does not have position.

Generated AST ```json { "type": "root", "content": [ { "type": "environment", "env": "itemize", "content": [ { "type": "macro", "content": "item", "position": { "start": { "offset": 20, "line": 2, "column": 5 }, "end": { "offset": 25, "line": 2, "column": 10 } }, "_renderInfo": { "hangingIndent": true, "namedArguments": [ null, "label", null ], "inParMode": true }, "args": [ { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [ { "type": "whitespace" }, { "type": "string", "content": "First", "position": { "start": { "offset": 26, "line": 2, "column": 11 }, "end": { "offset": 31, "line": 2, "column": 16 } } }, { "type": "whitespace", "position": { "start": { "offset": 31, "line": 2, "column": 16 }, "end": { "offset": 32, "line": 2, "column": 17 } } }, { "type": "string", "content": "item", "position": { "start": { "offset": 32, "line": 2, "column": 17 }, "end": { "offset": 36, "line": 2, "column": 21 } } } ], "openMark": "", "closeMark": "" } ] }, { "type": "parbreak" }, { "type": "macro", "content": "item", "position": { "start": { "offset": 41, "line": 3, "column": 5 }, "end": { "offset": 46, "line": 3, "column": 10 } }, "_renderInfo": { "hangingIndent": true, "namedArguments": [ null, "label", null ], "inParMode": true }, "args": [ { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [], "openMark": "", "closeMark": "" }, { "type": "argument", "content": [ { "type": "whitespace" }, { "type": "string", "content": "Second", "position": { "start": { "offset": 47, "line": 3, "column": 11 }, "end": { "offset": 53, "line": 3, "column": 17 } } }, { "type": "whitespace", "position": { "start": { "offset": 53, "line": 3, "column": 17 }, "end": { "offset": 54, "line": 3, "column": 18 } } }, { "type": "string", "content": "item", "position": { "start": { "offset": 54, "line": 3, "column": 18 }, "end": { "offset": 58, "line": 3, "column": 22 } } } ], "openMark": "", "closeMark": "" } ] } ], "position": { "start": { "offset": 0, "line": 1, "column": 1 }, "end": { "offset": 72, "line": 4, "column": 14 } }, "args": [ { "type": "argument", "content": [], "openMark": "", "closeMark": "" } ] } ], "position": { "start": { "offset": 0, "line": 1, "column": 1 }, "end": { "offset": 73, "line": 5, "column": 1 } } } ```

Expected behavior

The parbreak has position.

How to reproduce

Use getParser and parse sample tex source by it.

import * as fs from "fs";
import { getParser } from '@unified-latex/unified-latex-util-parse';

const content = fs.readFileSync("sample.tex").toString();
const ast = getParser().parse(content);
console.log(JSON.stringify(ast, undefined, "  "));
siefkenj commented 6 months ago

Hi @pddg ! The parbreak is inserted by the parser but isn't actually present in the source. That's why it has no position information. The body of enumerate environments is parsed and then cleaned to ensure uniform printing.

If you want to prevent unified-latex from normalizing the itemize environment, you can use unifiedLatexFromStringMinimal or pass itemize into the envoronments option, which will override the default environment normalization for that environment.

pddg commented 6 months ago

Thank you very much. I think I can get what I want by parseMinimal.