前端模板之`mustache.js`源码解析

前端模板`mustache.js`源码解析

近些年各种前端模板引擎层出不穷，mustache就是其中比较出名的一种。mustache是一种弱逻辑的模板语法，mustache.js是它的JS实现。

为什么关注`mustache.js`并去解析源码？

underscore template和Micro-Templating等等模板是基于原生JS语法，解析基本是运用正则拼接字符串；相比它们，mustache.js基于自定义语法，解析更为复杂。解读mustache.js可以学习自定义语法的解析（简单的手写解析器）。
mustache.js本身代码精简（v2.2.1只有600+行），结构清晰，易于理解。
mustache.js的进阶版有handlebars等等，可以在mustache.js基础上自己定制/增强前端模板。
源码分析

600多行的mustache.js大致可以分为以下几部分：

├── context.js   # Context类
├── parser.js    # 主要是 parseTemplate 函数
├── scanner.js   # Scanner类
├── utils.js     # 工具函数：isArray, escapeHtml等等
└── writer.js    # Writer类

以上是我对代码按功能分块重构（ES6）后的文件组织形式，其中index.js主要暴露API，utils.js是一些工具函数，这里都省略掉，重点要讲的就是context.js/parser.js/scanner.js/writer.js四部分。完整的代码可以在项目sugar-template中查看。

`scanner.js`

Scanner类很简单，主要功能是扫描字符串，按指定正则分割字符串。

class Scanner {
    constructor(text) {
        this.text = text
        this.tail = text
        this.pos = 0
    }
    eos() {
        return this.tail === ''
    }
    scanUntil(re) {
        const index = this.tail.search(re)
        let match

        switch (index) {
            case -1:
                match = this.tail
                this.tail = ''
                break
            case 0:
                match = ''
                break
            default:
                match = this.tail.substring(0, index)
                this.tail = this.tail.substring(index)
        }

        this.pos += match.length
        return match
    }
    scan(re) {
        const match = this.tail.match(re)

        if (!match || match.index !== 0) return ''

        const text = match[0]
        this.tail = this.tail.substring(text.length)
        this.pos += text.length

        return text
    }
}

类只有3个方法，都很简短，稍微讲解scanUntil和scan。两者都接受一个正则作为参数，其中

scanUntil是把符合正则部分之前的字符串切分出来。假设this.tail.search(re) --> index，函数返回this.tail.slice(0, index)。
scan是把符合正则部分（必须从开头符合正则）的字符串切分出来。

两个方法都用来按正则截取字符串，并内部处理pos标记位置。

`parser.js`

结下来讲parser.js，它是Scanner类的使用者，用于把模板解析为token树。

由于这段代码较长，所以解析放在代码注释里。

function parseTemplate(template, tags = sugar.tags) {
    if (!template) return []

    let sections = [] // Stack to hold section tokens
    let tokens = [] // Buffer to hold the tokens
    let spaces = [] // Indices of whitespace tokens on the current line
    let hasTag = false // Is there a {{tag}} on the current line?
    let nonSpace = false // Is there a non-space char on the current line?

    let openingTagRe
    let closingTagRe
    let closingCurlyRe
    // 解析tags, 生成 openingTagRe, closingTagRe, closingCurlyRe 这3个正则，
    // 分别用来检测 开始标签（一般"{{"），结束标签（一般"}}"），raw输出结束标签（一般"}}}"）
    compileTags(tags)

    // 用template创建Scanner实例
    const scanner = new Scanner(template)

    let start, type, value, chr, token, openSection
    // 条件：只要template没有处理完
    while (!scanner.eos()) {
        start = scanner.pos // 当前处理位置，初始 0

        // （1）把开始标签前的字符串截取出来（开始标签前的必然是纯粹的text）
        value = scanner.scanUntil(openingTagRe)

        if (value) {
            for (let i = 0, valueLength = value.length; i < valueLength; ++i) {
                chr = value.charAt(i)
                // 如果字符是空白，把index（tokens数组位置）放到spaces数组中
                if (isWhitespace(chr)) {
                    spaces.push(tokens.length)
                } else {
                    nonSpace = true // nonSpace标志设为true
                }
                // 作为text类型的token放到tokens数组
                tokens.push(['text', chr, start, ++start])

                // 如果是换行，那么检查整行，看是否需要把空白删掉
                if (chr === '\n') stripSpace()
            }
        }

        // （2）截取开始标签，如果没有，跳出while
        if (!scanner.scan(openingTagRe)) {
            break
        }
        // 设置 hasTag 标志为 true，因为开始标签之后的就是标签类型及内容了
        hasTag = true

        // （3）截取标签类型，可能是`#,^,/,>,{,&,=,!`中的一种，如果都不是，那么就是 name
        type = scanner.scan(tagRe) || 'name'
        // (4) 截取（删掉）可能的空白
        scanner.scan(whiteRe)

        // (5) 根据标签类型来获取标签的内容
        // 类型是 ＝ ，用于切换开始结束标签，形式类似 {{=<% %>=}}
        if (type === '=') {
            // 所以我们把结束的 ＝ 前的截取出来才是标签内容
            value = scanner.scanUntil(equalsRe)
            scanner.scan(equalsRe)
            scanner.scanUntil(closingTagRe)
        }
        // 类型是 { ，表示内容不用转译，原样输出，形式类似 {{{name}}}
        else if (type === '{') {
            // 所以我们必须把 '}}}' 前的字符串截取出来作为标签内容
            value = scanner.scanUntil(closingCurlyRe)
            scanner.scan(curlyRe)
            scanner.scanUntil(closingTagRe)
            type = '&'
        }
        // 其他类型下结束标签 '}}' 前的就是内容
        else {
            value = scanner.scanUntil(closingTagRe)
        }

        // (6) 截取/删掉结束标签
        if (!scanner.scan(closingTagRe))
            throw new Error('Unclosed tag at ' + scanner.pos)
        // 构造token并push
        token = [type, value, start, scanner.pos]
        tokens.push(token)

        // 根据类型做一些额外处理
        if (type === '#' || type === '^') {
            sections.push(token) // 如果是section类的开始（#,^），push 到sections
        }
        // 如果是section类的结束（/），pop sections 并校验section完整性
        else if (type === '/') {
            // Check section nesting.
            openSection = sections.pop()

            if (!openSection)
                throw new Error('Unopened section "' + value + '" at ' + start)

            if (openSection[1] !== value)
                throw new Error('Unclosed section "' + openSection[1] + '" at ' + start)
        }
        // 对于 name,{,& ，说明需要输出字符，这一行就是 nonSpace 的
        else if (type === 'name' || type === '{' || type === '&') {
            nonSpace = true
        }
        // 对于 ＝ ，重新解析开始结束标签，以供下面继续解析时更换开始结束标签正则
        else if (type === '=') {
            compileTags(value)
        }
    }

    // 保证template处理完后不会剩余section，否则就是模板中有未闭合的section
    openSection = sections.pop()
    if (openSection)
        throw new Error('Unclosed section "' + openSection[1] + '" at ' + scanner.pos)

    return nestTokens(squashTokens(tokens))

    // 不必多说，就是设置3个正则 openingTagRe,closingTagRe,closingCurlyRe
    function compileTags(tagsToCompile) {
        if (typeof tagsToCompile === 'string')
            tagsToCompile = tagsToCompile.split(spaceRe, 2)

        if (!isArray(tagsToCompile) || tagsToCompile.length !== 2)
            throw new Error('Invalid tags: ' + tagsToCompile)

        openingTagRe = new RegExp(escapeRegExp(tagsToCompile[0]) + '\\s*')
        closingTagRe = new RegExp('\\s*' + escapeRegExp(tagsToCompile[1]))
        closingCurlyRe = new RegExp('\\s*' + escapeRegExp('}' + tagsToCompile[1]))
    }

    // 如果某行只有section开始/结束标签，那么删除这行的所有空白
    // 比如 1. {{#tag}}  2. {{/tag}} 这种，因为它们所在行如果只有空白加标签，那么空白是
    // 无意义的，并且不应该影响最终生成的字符串
    function stripSpace() {
        if (hasTag && !nonSpace) {
            while (spaces.length) {
                tokens[spaces.pop()] = null
            }
        } else {
            spaces = []
        }

        hasTag = false
        nonSpace = false
    }
}

parseTemplate应该算比较长了，但总体来说并不复杂，就是完成一个template string ---> tokens的转换。

token的格式是：[type, value, startIndex, endIndex]。

另外：

squashTokens函数的作用是合并text token；
而nestTokens函数的作用是把tokens转化成tokens tree。([type, value, startIndex, endIndex, innerTokens])。

总体来说两个函数都不难，这里只讲解下nestTokens：

function nestTokens(tokens) {
    const nestedTokens = []
    let collector = nestedTokens
    const sections = []

    let token, section
    for (let i = 0, numTokens = tokens.length; i < numTokens; ++i) {
        token = tokens[i]
        // 唯一要注意的就是 遇到类型是 '#'/'^' 时，说明遇到section了，
        // 那么collector = token[4] =［］，push接下来的其他类型token，sections也push这个section
        // 一旦遇到 '/'，说明当前section结束，pop这个section
        // section[5]填入pos信息，collector指向上一层的section或者根tokens
        switch (token[0]) {
            case '#':
            case '^':
                collector.push(token)
                sections.push(token)
                collector = token[4] = []
                break
            case '/':
                section = sections.pop()
                section[5] = token[2]
                collector = sections.length > 0 ? sections[sections.length - 1][4] : nestedTokens
                break
            default:
                collector.push(token)
        }
    }

    return nestedTokens
}

`context.js`

到这里我们已经获得了tokens，那么怎么从tokens + data ---> html？

别急，先完成一个依赖任务，怎么处理这个data？

有人会问，data要处理吗？不同模板引擎有不同的态度，mustache处理data后可以做到：

组成一个data chains，内层可以访问外层的数据
提供API，简化/缓存数据访问

// 作为渲染上下文，包装data，并且有一个父上下文的引用
class Context {
    constructor(data, parentContext) {
        this.data = data
        this.cache = {
            '.': this.data
        }
        this.parent = parentContext
    }
    // 返回新生成的子context，参数data作为新context的数据，父context指向this
    push(data) {
        return new Context(data, this)
    }
    // 根据name在当前上下文查找数据，找不到则递归向上找
    lookup(name) {
        let cache = this.cache
        let value
        // 首先检查cache
        if (cache.hasOwnProperty(name)) {
            value = cache[name]
        } else {
            let context = this
            let names, index, lookupHit = false

            while (context) {
                if (name.indexOf('.') > 0) {
                    value = context.data
                    names = name.split('.')
                    index = 0

                    // 注意，name可以是 "prop1.prop2.prop3"
                    while (value != null && index < names.length) {
                        if (index === names.length - 1)
                            lookupHit = hasProperty(value, names[index])

                        value = value[names[index++]]
                    }
                } else {
                    value = context.data[name]
                    lookupHit = hasProperty(context.data, name)
                }
                // 找到则跳出循环
                if (lookupHit) break
                // 找不到则递归向上
                context = context.parent
            }

            cache[name] = value
        }

        if (isFunction(value)) {
            value = value.call(this.data)
        }

        return value
    }
}

怎么说呢，Context的作用清晰简单，并没有什么需要特别讲解。下面以一个例子明确下它的作用：

const ctx = new Context({
    title: 'welcome',
    user: {
        age: 18
    }
})
ctx.lookup('user.age')  // ---> 18，这在模板中很有用，因为我们经常写 {{user.age}} 这种

`writer.js`

writer.js主要是Writer类，负责tokens + data ---> html。

Writer类主要需要关注的是render**形式的方法，

class Writer {
    constructor() {
        this.cache = {}
    }
    clearCache() {
        this.cache = {}
    }
    // 并没有什么特别，对parseTemplate的包装，加入缓存
    parse(template, tags) {
        const cache = this.cache
        let tokens = cache[template]

        if (tokens == null) {
            tokens = cache[template] = parseTemplate(template, tags)
        }

        return tokens
    }
    // template ---> tokens, data ---> context, 然后调用renderTokens
    render(template, view, partials) {
        const tokens = this.parse(template)
        const context = (view instanceof Context) ? view : new Context(view)
        return this.renderTokens(tokens, context, partials, template)
    }
    // 根据token的类型调用不同方法，拼接各个token生成的html
    renderTokens(tokens, context, partials, originalTemplate) {
        let buffer = ''

        let token, symbol, value
        for (let i = 0, numTokens = tokens.length; i < numTokens; ++i) {
            value = undefined
            token = tokens[i]
            symbol = token[0]

            if (symbol === '#') {
                value = this.renderSection(token, context, partials, originalTemplate)
            } else if (symbol === '^') {
                value = this.renderInverted(token, context, partials, originalTemplate)
            } else if (symbol === '>') {
                value = this.renderPartial(token, context, partials, originalTemplate)
            } else if (symbol === '&') {
                value = this.unescapedValue(token, context)
            } else if (symbol === 'name') {
                value = this.escapedValue(token, context)
            } else if (symbol === 'text') {
                value = this.rawValue(token)
            }

            if (value !== undefined) {
                buffer += value
            }
        }

        return buffer
    }
    // 对于text类型，直接返回template中对应的字符串
    rawValue(token) {
        return token[1]
    }
    // 对于{{name}}，返回 escape(context.lookup(name))
    escapedValue(token, context) {
        const value = context.lookup(token[1])
        if (value != null)
            return escapeHtml(value)
    }
    // 对于{{{name}}} 和 {{&name}}，返回 context.lookup(name)
    unescapedValue(token, context) {
        const value = context.lookup(token[1])
        if (value != null)
            return value
    }
    // 对于{{>name}}，获取 partials(name) 或者 partials[name]作为partial内容，
    // 然后对这个内容再this.renderTokens(this.parse(value), context, partials, value)
    // 即partial作为新的template，加上已有的context去渲染出html
    renderPartial(token, context, partials) {
        if (!partials) return

        const value = isFunction(partials) ? partials(token[1]) : partials[token[1]]
        if (value != null)
            return this.renderTokens(this.parse(value), context, partials, value)
    }
    // 对于{{^name}}，在context.lookup(name)是falsy时才渲染，
    // 调用 this.renderTokens 渲染内层的tokens即可
    renderInverted(token, context, partials, originalTemplate) {
        const value = context.lookup(token[1]);

        if (!value || (isArray(value) && value.length === 0))
            return this.renderTokens(token[4], context, partials, originalTemplate);
    }
    // 对于{{#name}}，作为重头戏，渲染section。
    // 在value = context.lookup(name)是trusy时才渲染section的内层tokens
    renderSection(token, context, partials, originalTemplate) {
        let buffer = ''
        const value = context.lookup(token[1])
        const subRender = (template) => {
            return this.render(template, context, partials)
        }

        if (!value) return

        // value 是数组，构造子context，数组的每个元素作为data，渲染内层tokens
        if (isArray(value)) {
            for (let j = 0, valueLength = value.length; j < valueLength; ++j) {
                buffer += this.renderTokens(token[4], context.push(value[j]), partials, originalTemplate)
            }
        }
        // value 是对象，字符串，数字，value作为data构造子context，渲染内层tokens
        else if (typeof value === 'object' || typeof value === 'string' || typeof value === 'number') {
            buffer += this.renderTokens(token[4], context.push(value), partials, originalTemplate)
        }
        // value 是函数，函数执行返回值作为渲染后的html直接返回
        else if (isFunction(value)) {
            if (typeof originalTemplate !== 'string')
                throw new Error('Cannot use higher-order sections without the original template')

            value = value.call(context.view, originalTemplate.slice(token[3], token[5]), subRender)

            if (value != null)
                buffer += value
        }
        // value 是 `true`，不用构造子context，直接渲染内层tokens
        else {
            buffer += this.renderTokens(token[4], context, partials, originalTemplate)
        }
        return buffer
    }
}

来一个例子加深理解：

new Writer().render(
`
<h1>{{title}}</h1>
{{#user}}
    <p>{{user.name}},{{user.age}}</p>
    {{#user.hobbies}}
        * <a>{{.}}</a>
    {{/user.hobbies}}
{{/user}}
`, {
    title: 'Info',
    user: {
        name: 'Jack',
        age: 18,
        hobbies: ['football', 'badminton', 'tennis']
    }
})

// tokens: 
[
    ["text", "\n<h1>", 0, 5],
    ["name", "title", 5, 14],
    ["text", "</h1>\n", 14, 20],
    ["#", "user", 20, 29, [
        ["text", "    <p>", 30, 37],
        ["name", "user.name", 37, 50],
        ["text", ",", 50, 51],
        ["name", "user.age", 51, 63],
        ["text", "</p>\n", 63, 68],
        ["#", "user.hobbies", 72, 89, [
            ["text", "        * <a>", 90, 103],
            ["name", ".", 103, 108],
            ["text", "</a>\n", 108, 113]
        ], 117]
    ], 135]
]

// html:
// 
// <h1>Info</h1>
//     <p>Jack,18</p>
//         * <a>football</a>
//         * <a>badminton</a>
//         * <a>tennis</a>
//

render**基本也没复杂逻辑，只要注意2点：

对于section，主要是构造子context（true时不用构造）去递归render；
对于partial，其实是把partial作为新的template去渲染出来；
最终还是调用rawValue|escapedValue|unescapedValue3个基础方法。
结语

对mustache.js的源码解析到这里结束，希望看文章的各位没被我误导；如有不对或有疑问，也请直接回复 😄

creeperyang / blog