ropensci / tinkr

Convert (R)Markdown files to XML, edit them, write them back as (R)Markdown
https://docs.ropensci.org/tinkr
GNU General Public License v3.0
57 stars 3 forks source link

Improve show method; add nodelist show functions #108

Closed zkamvar closed 5 months ago

zkamvar commented 6 months ago

This PR improves QOL for people subsetting documents by allowing them to see the context of the nodes they create.

I'm not terribly set on the names, but I've got:

I've also included to_md_vec(), allowing users to get markdown vectors from nodelists, which is useful for things like headings that contain markdown elements.

I'm hoping to include this as tinkr 0.3.0 (which does not include #107)

to_md_vec()

library("tinkr")
path <- system.file("extdata", "example1.md", package = "tinkr")
yaml_xml_list <- to_xml(path)
names(yaml_xml_list)
#> [1] "yaml" "body"
# extract the level 3 headers from the body
headers3 <- xml2::xml_find_all(
  yaml_xml_list$body,
  xpath = './/md:heading[@level="3"]', 
  ns = md_ns()
)
# show the headers
print(h3 <- to_md_vec(headers3))
#> [1] "### Getting a list of 50 species from occurrence data"
#> [2] "### Querying the scientific literature"               
#> [3] "### Querying scientific open data"                    
#> [4] "### Conclusion"

Created on 2024-06-14 with reprex v2.1.0

Show Functions

library("tinkr")
path <- system.file("extdata", "show-example.md", package = "tinkr")
y <- yarn$new(path, sourcepos = TRUE)
y$protect_math()$protect_curly()
items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns())
imgs <- xml2::xml_find_all(y$body, ".//md:image | .//node()[@curly]", 
  tinkr::md_ns())
links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns())
code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns())
blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns())
# show a list of items 
show_list(links)
#> 
#> 
#> [relative links](#links)
#> 
#> [anchor links]
#> 
#> [anchor links]: https://example.com/anchor
show_list(code)
#> 
#> 
#> `utils::strcapture()`
show_list(blocks)
#> 
#> 
#> ```r
#> sourcepos <- c("2:1-2:33", "4:1-7:7")
#> pattern <- "([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+):([[:digit:]]+)"
#> proto <- data.frame(
#>   linestart = integer(), colstart = integer(),
#>   lineend = integer(), colend = integer()
#> )
#> utils::strcapture(pattern, sourcepos, proto)
#> ```

# show the items in their local structure
show_block(items)
#> 
#> 
#> - kittens
#>   - are
#>     - super
#>     - cute
#>   - have
#>     - teef
#>     - murder mittens
#> - brains
#>   - are
#>     - wrinkly
show_block(links, mark = TRUE)
#> 
#> 
#> [...] [relative links](#links) [...][...] [anchor links] [...]
#> 
#> [anchor links]: https://example.com/anchor
# show the items in the full document censored (everything but whitespace):
show_censor(imgs)
#> 
#> 
#> ## ▇▇▇▇▇
#> 
#> ### ▇▇▇▇▇▇▇▇
#> 
#> ▇▇▇▇ ▇▇▇ ▇▇▇▇ [▇▇▇▇▇▇▇▇ ▇▇▇▇▇](▇▇▇▇▇▇) ▇▇▇ [▇▇▇▇▇▇ ▇▇▇▇▇]▇
#> 
#> ### ▇▇▇▇▇▇
#> 
#> ![kittens are cute](https://loremflickr.com/320/240){alt='a random picture of a kitten'}
#> 
#> ## ▇▇▇▇▇
#> 
#> - ▇▇▇▇▇▇▇
#>   - ▇▇▇
#>     - ▇▇▇▇▇
#>     - ▇▇▇▇
#>   - ▇▇▇▇
#>     - ▇▇▇▇
#>     - ▇▇▇▇▇▇ ▇▇▇▇▇▇▇
#> - ▇▇▇▇▇▇
#>   - ▇▇▇
#>     - ▇▇▇▇▇▇▇
#> 
#> ## ▇▇▇▇
#> 
#> ▇▇▇▇ ▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇ ▇▇▇ `▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇` ▇▇▇▇▇▇▇▇
#> 
#> ```r
#> ▇▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇
#> ▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
#> ▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇▇
#>   ▇▇▇▇▇▇▇▇▇ ▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇ ▇▇▇▇▇▇▇▇▇▇
#>   ▇▇▇▇▇▇▇ ▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇ ▇ ▇▇▇▇▇▇▇▇▇
#> ▇
#> ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇
#> ```
#> 
#> ## ▇▇▇▇
#> 
#> ▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇ ▇▇ ▇ ▇▇ ▇ ▇▇ ▇▇▇▇▇ ▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇ ▇▇▇
#> 
#> ▇▇
#> ▇ ▇ ▇▇ ▇ ▇
#> ▇▇
#> 
#> [▇▇▇▇▇▇ ▇▇▇▇▇]: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇

# You can also adjust the censorship parameters. There are two paramters
# available: the mark, which chooses what character you want to use to
# replace characters (default: `\u2587`); and the regex which specifies 
# characters to replace (default: `[^[:space:]]`, which replaces all
# non-whitespace characters.
# 
# The following will replace everything that is not a whitespace
# or punctuation character with "o" for a very ghostly document
op <- options()
options(tinkr.censor.regex = "[^[:space:][:punct:]]")
options(tinkr.censor.mark = "o")
show_censor(links)
#> 
#> 
#> ## ooooo
#> 
#> ### oooooooo
#> 
#> oooo ooo oooo [relative links](#links) ooo [anchor links].
#> 
#> ### oooooo
#> 
#> ![ooooooo ooo oooo](ooooo://ooooooooooo.ooo/ooo/ooo){ooo='o oooooo ooooooo oo o oooooo'}
#> 
#> ## ooooo
#> 
#> - ooooooo
#>   - ooo
#>     - ooooo
#>     - oooo
#>   - oooo
#>     - oooo
#>     - oooooo ooooooo
#> - oooooo
#>   - ooo
#>     - ooooooo
#> 
#> ## oooo
#> 
#> oooo oo oo ooooooo oo ooo `ooooo::oooooooooo()` oooooooo
#> 
#> ```r
#> ooooooooo <- o("o:o-o:oo", "o:o-o:o")
#> ooooooo <- "([[:ooooo:]]+):([[:ooooo:]]+)-([[:ooooo:]]+):([[:ooooo:]]+)"
#> ooooo <- oooo.ooooo(
#>   ooooooooo = ooooooo(), oooooooo = ooooooo(),
#>   ooooooo = ooooooo(), oooooo = ooooooo()
#> )
#> ooooo::oooooooooo(ooooooo, ooooooooo, ooooo)
#> ```
#> 
#> ## oooo
#> 
#> oooooo oooo ooo oo ooooooo oo $o = oo + o$ ooooo ooooo oooo oooooo oo:
#> 
#> $$
#> o = oo + o
#> $$
#> 
#> [anchor links]: https://example.com/anchor
options(tinkr.censor.regex = NULL)
options(tinkr.censor.mark = NULL)

Created on 2024-06-14 with reprex v2.1.0