WuyiUniversity / forum

五邑大学程序员专用论坛
https://github.com/WuyiUniversity/forum/issues
12 stars 1 forks source link

判断网页是不是cjk语言系-摘取自evernote悦读插件 #26

Open skyhacker2 opened 9 years ago

skyhacker2 commented 9 years ago
//  language {
//  ==========

    $D.detectLanguage = function ()
    {
        //  text <- title
        var _text = $D.document.title;

        //  add text
        var _add_text = function (_elements)
        {
            //  too much
            if (_text.length > 500) { return; }

            //  add
            for (var _l=_elements.length, _i=0, _ii=Math.min(5, _l); _i<_ii; _i++)
                { _text += ' ' + _elements[$D.rand(1, _l)-1].innerHTML.replace(/<([^>]+?)>/gi, '').replace(/([ \n\r\t]+)/gi, ' ').substr(0, 150); }
        };

        //  elements
        var _paragraphs = $D.document.getElementsByTagName('p'), 
            _spans = $D.document.getElementsByTagName('span'),
            _divs = $D.document.getElementsByTagName('div'),
            _body = [$D.document.body];

        //  remove our own divs
        var _divs2 = [];
        for (var _i=0, _ii=_divs.length; _i<_ii; _i++) { if (_divs[_i].id && _divs[_i].id.indexOf && _divs[_i].id.indexOf('evernote_clearly__') === 0) {}else { _divs2.push(_divs[_i]); } }
        _divs = _divs2;

        //  add
        _add_text(_paragraphs);
        _add_text(_spans);
        _add_text(_divs);
        _add_text(_body);

        //  check
        switch (true)
        {
            case (_text.match(/([\u3000])/gi) != null):
            case (_text.match(/([\u3001])/gi) != null):
            case (_text.match(/([\u3002])/gi) != null):
            case (_text.match(/([\u301C])/gi) != null):
                $D.language = 'cjk';
                break;
        }
    };

//  language }
Jayin commented 9 years ago

cjk语言系是什么东西。。

fritx commented 9 years ago

chinese japanse korean?? 什么原理?

skyhacker2 commented 9 years ago

@fritx 其实就是判断文本里面是否有中文的句号,逗号,空格,还有这个~