JuliaInterop / NBInclude.jl

import code from IJulia Jupyter notebooks into Julia programs
Other
117 stars 17 forks source link

Filtering based on Tags #27

Open arikheinss opened 2 years ago

arikheinss commented 2 years ago

Hi there.

With JupyterLabs capability to add arbitrary metadata-tags to cells, I think it would be cool if we could filter included cells based on these Tags.

I have actually already made a modified version of the module with this feature implemented through two new keywords in the nbinclude-function, excludeTags and includeOnlyTags, complete with documentation and tests. But since I dont know much about how Github works I am not quite sure how to properly propose these changes. For now, I will include the changed function in this Issue.

Any Feedback is appreciated.

function nbinclude(m::Module, path::AbstractString;
                   renumber::Bool=false,
                   counters = 1:typemax(Int),
                   regex::Regex = r"",
                   anshook = identity,
                   softscope::Bool=false,
                   includeOnlyTags::_TagListType = nothing,
                   excludeTags::_TagListType =nothing,
    )
   # act like include(path), in that path is relative to current file:
   # for precompilation, invalidate the cache if the notebook changes:
    path, prev = @static if VERSION >= v"0.7.0-DEV.3483" # julia#25455
        Base._include_dependency(m, path)
    else
        Base._include_dependency(path)
    end

    includeOnlyTags = isnothing(includeOnlyTags) ? nothing : string.(includeOnlyTags) |> Set
    excludeTags = isnothing(excludeTags) ? nothing : string.(excludeTags) |> Set

    # similar to julia#22588, we assume that all nodes
    # where you are running nbinclude can access the filesystem
    nb = open(JSON.parse, path, "r")

    # check for an acceptable notebook:
    nb["nbformat"] == 4 || error("unrecognized notebook format ", nb["nbformat"])
    lang = lowercase(nb["metadata"]["language_info"]["name"])
    lang == "julia" || error("notebook is for unregognized language $lang")

    ret = nothing
    counter = 0 # keep our own cell counter to handle un-executed notebooks.
    for cell in nb["cells"]
        if cell["cell_type"] == "code" && !isempty(cell["source"])
            s = join(cell["source"])
            isempty(strip(s)) && continue # Jupyter doesn't number empty cells
            counter += 1
            occursin(shell_or_help, s) && continue
            cellnum = renumber ? string(counter) :
                      cell["execution_count"] == nothing ? string('+',counter) :
                      string(cell["execution_count"])
            counter in counters && occursin(regex, s) || continue

            if excludeTags ≠ nothing && haskey(cell["metadata"],"tags")
                if length(excludeTags ∩ cell["metadata"]["tags"]) > 0
                    continue
                end
            end

            if includeOnlyTags ≠ nothing 
                haskey(cell["metadata"],"tags") || continue
                if (includeOnlyTags ∩ cell["metadata"]["tags"]) ≠ includeOnlyTags
                    continue
                end
            end

            ret = Base.task_local_storage(_in_nbinclude, true) do
                my_include_string(m, s, string(path, ":In[", cellnum, "]"), prev, softscope)
            end
            anshook(ret)
        end
    end
    return ret
end