jgm / pandoc

Universal markup converter
https://pandoc.org
Other
33.87k stars 3.34k forks source link

More consistent superscript range for in-text citation between docx-to-md and md-to-md. #9784

Open jiucenglou opened 3 months ago

jiucenglou commented 3 months ago

Explain the problem. I am using a md->docx->md workflow. The original md is normalized by calling ./pandoc manuscript.md -F ./pandoc-crossref --citeproc --bibliography=topic.bib --csl=acs.csl --wrap=preserve -t markdown-citations -s -o manuscript_reference.md. In the normalized md, a in-text citation appears as statement^[11](#ref-author-year-title-key)^. In the docx->md, the in-text citation appears as statement[^11^](#ref-author-year-title-key). Could these two superscript style be more consistent, so that when I diff I can see actual revisions more easily : D

Pandoc version? Latest 3.2.

jiucenglou commented 3 months ago

For now the following lua script could be used to re-superscript from [^11^](#...) to ^[11](#...)^

lpeg = require('lpeg')

-- For better performance we put these functions in local variables:
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
  lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
  lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt

local function maybe(p) return p^-1 end
local digits = R'09'^1
local mpm = maybe(S'+-')
local dot = '.'
local exp = S'eE'
local int = mpm * digits
local float = mpm * digits * maybe(dot*digits) * maybe(exp*mpm*digits)

local sp = S(' \t')
local function space(pat) return sp^0 * pat * sp^0 end
local intc = space(int/tonumber)
local floatc = space(float/tonumber)

local function list_separated_by_comma(pat)
  pat = space(pat)
  return pat * (',' * pat)^0
end

local function list_separated_by_space(pat)
  pat = space(pat)
  return pat^1
end

local function clear(tbl)
  for k in pairs(tbl) do
    tbl[k] = nil
  end
end

-- re-superscript in-text citations 
-- from [^11^](#...) to ^[11](#...)^

local noLB = lpeg.P(1) - "["
local single_cite = lpeg.C(noLB^0) * lpeg.P"[^" * lpeg.C(lpeg.R"09"^1) * lpeg.P"^]" * lpeg.P"(#" * lpeg.C(lpeg.R("09","az","AZ","-_")^1) * lpeg.P")"
local pattern = lpeg.Ct(single_cite^1 * lpeg.C(lpeg.P(1)^0))

local file_path = arg[1]
local f = assert(io.open(file_path))
local lines = {}
for line in f:lines() do
  table.insert(lines, line)
end
f:close()
for k,line in ipairs(lines) do
  res = pattern:match(line)
  if res then
  --for k,v in pairs(res) do
  --  print(k.." = "..v)
  --end
    assert((#res-1) % 3 == 0)
    s = ""
    for i = 1, #res-1, 3 do
      s = s .. res[i] .. "^[" .. res[i+1] .. "](#" .. res[i+2] .. ")^"
    end
    s = s .. res[#res]
    lines[k] = s
  end
end
f = assert(io.open(file_path, "w"))
for k,line in ipairs(lines) do
  f:write(line, "\n")
end
f:close()

with the command line lua -llpeg resuperscript_in_text_citation.lua draft_sentence_per_line.md