cnlog / warehouse

就是个仓库
4 stars 0 forks source link

分词函数如何配置到方案中? #2

Open chzhsh opened 3 years ago

chzhsh commented 3 years ago

怎么使用的,能否指导一下需要怎么配置

cnlog commented 3 years ago

怎么使用的,能否指导一下需要怎么配置 你看一下 rime-easy-en 主页的说明,先安装好rime-easy-en,能在f4选单中看到,说明安装好了,然后再按这个分词功能说明修改easy_en.lua的函数既可

chzhsh commented 3 years ago

下载并安装了https://github.com/BlindingDark/rime-easy-en,还不会修改easy_en.lua的函数,方便贴出修改哪里吗? local is_split_sentence local wordninja_split

local function capture(cmd) local f = assert(io.popen(cmd, 'r')) local s = assert(f:read('*a')) f:close() return s end

local function init(env) is_split_sentence = env.engine.schema.config:get_bool('easy_en/split_sentence') if not is_split_sentence then wordninja_split = function(sentence) return sentence end return end

local use_wordninja_rs_lua_module = env.engine.schema.config:get_bool('easy_en/use_wordninja_rs_lua_module') local use_wordninja_rs = env.engine.schema.config:get_bool('easy_en/use_wordninja_rs') local use_wordninja_py = env.engine.schema.config:get_bool('easy_en/use_wordninja_py') if (not use_wordninja_rs_lua_module) and (not use_wordninja_rs) and (not use_wordninja_py) then -- default use wordninja_rs_lua_module use_wordninja_rs_lua_module = true end

if use_wordninja_rs_lua_module then local wordninja_rs_lua_module_path = env.engine.schema.config:get_string('easy_en/wordninja_rs_lua_module_path') if not string.find(package.cpath, wordninja_rs_lua_module_path, 1, true) then package.cpath = package.cpath .. ";" .. wordninja_rs_lua_module_path end wordninja_split = require("wordninja").split return end

if use_wordninja_rs then local wordninja_rs_path = env.engine.schema.config:get_string('easy_en/wordninja_rs_path') wordninja_split = function(sentence) return capture(wordninja_rs_path .. " -n '" .. sentence .. "'") end return end

if use_wordninja_py then wordninja_split = function(sentence) return capture([[python -c "import sys; import wordninja; sys.stdout.write(' '.join(wordninja.split(']] .. sentence .. [[')))"]]) end return end end

local function enhance_filter(input, env) local cands = {}

for cand in input:iter() do if (cand.comment:find("☯")) then if (is_split_sentence) then sentence = wordninja_split(cand.text) lower_sentence = string.lower(sentence)

        if (not (lower_sentence == sentence)) then
           yield(Candidate("sentence", cand.start, cand._end, lower_sentence .. " ", "💡"))
        end

        yield(Candidate("sentence", cand.start, cand._end, sentence .. " ", "💡"))
     end
  else
     yield(Candidate("word", cand.start, cand._end, cand.text .. " ", cand.comment))
  end

end end

return { enhance_filter = { init = init, func = enhance_filter} }

cnlog commented 3 years ago

呃,他们工程升级了,和我用的版本不一样,我用的是这个版本的,你对照看看 easy_en.lua

local function capture(cmd) local f = assert(io.popen(cmd, 'r')) local s = assert(f:read('*a')) f:close() return s end

local function l_debuger(info) --local file = assert(io.open('out.txt', 'w+')) --assert(file:write(info)) --file:close() --print( "ERROR:", info ) --print(_G) end

local function split_sentence_wordninja_py(sentence) return capture([[python -c "import sys; import wordninja; sys.stdout.write(' '.join(wordninja.split(']] .. sentence .. [[')))"]]) end

local function split_sentence_wordninja_rs(sentence, wordninja_rs_path) --return capture(wordninja_rs_path .. " -n " .. sentence) --here local result = l_word_split(0,sentence) --local result = xpcall( l_word_split(0,sentence), l_debuger ) return result end

local function split_sentence(sentence, env) local use_wordninja_rs = env.engine.schema.config:get_bool('easy_en/use_wordninja_rs')

if (use_wordninja_rs) then local wordninja_rs_path = env.engine.schema.config:get_string('easy_en/wordninja_rs_path') return split_sentence_wordninja_rs(sentence, wordninja_rs_path) else return split_sentence_wordninja_py(sentence) end end

local function enhance_filter(input, env) local cands = {} local is_split_sentence = env.engine.schema.config:get_bool('easy_en/split_sentence')

for cand in input:iter() do if (cand.comment:find("☯")) then if (is_split_sentence) then sentence = split_sentence(cand.text, env) lower_sentence = string.lower(sentence)

        if (not (lower_sentence == sentence)) then
           yield(Candidate("sentence", cand.start, cand._end, lower_sentence .. " ", "💡"))
        end

        yield(Candidate("sentence", cand.start, cand._end, sentence .. " ", "💡"))
     end
  else
     yield(Candidate("word", cand.start, cand._end, cand.text .. " ", cand.comment))
  end

end end

return { enhance_filter = enhance_filter}

chzhsh commented 3 years ago

期待你出个新的兼容的lua,使用wordninja_words.txt维护方便些。 另外,安装试用昨日版weasel-0.14.5.0-installer.exe发现如果patch自定义字体候选框内字符显示不全,请看看能否修复 QQ图片20210423161035

chzhsh commented 3 years ago

image 这是不使用自定义字体的显示,只是B行的最后个虎显示不了

cnlog commented 3 years ago

这应该不是cache不一致的问题,因为已经增加字体大小做hash了,你的告诉我你的配置和使用什么字体,要不然我没法测试的。你可以把配置打包放上来。

chzhsh commented 3 years ago

我重装了系统没有安装任何自定义字体,问题依旧。我确定bug的操作过程: 建一个weasel.custom.yaml有自定义内容但无“font_face:”项或者为空(必须有这个文件才能调用输入法设定窗口的介面风格设定),重新部署,此时候选框中文显示正常。如果要使用自定义字体,无论修改weasel.yaml文件“font_face:”项还是在weasel.custom.yaml添加“font_face:”为自定义字体,只要不是默认的“Microsoft YaHei”,即便是win10系统自带的字体都会重现。

chzhsh commented 3 years ago

style: color_scheme: aqua font_face: Microsoft YaHei <----