com-lihaoyi / upickle

uPickle: a simple, fast, dependency-free JSON & Binary (MessagePack) serialization library for Scala
https://com-lihaoyi.github.io/upickle
MIT License
722 stars 165 forks source link

Add a column-compacted indented output JSON format #503

Open lihaoyi opened 1 year ago

lihaoyi commented 1 year ago

Current the only options for rendering JSON is minified:

{"a":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6},"b":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6},"c":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6},"d":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6},"e":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6},"f":{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6}}

and indented/fully-vertically-expanded

{
  "a": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  },
  "b": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  },
  "c": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  },
  "d": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  },
  "e": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  },
  "f": {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6
  }
}

We should add a third option where you provide a max column width, and it one-lines the output JSON until it reaches the column width before wrapping:

# max columns = 60                                         |
{
  "a": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
  "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
  "c": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
  "d": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
  "e": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
  "f": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}
}

This would be the most tricky to implement efficiently v.s. minification or naive multi-lining, but would be the superior format for anything meant for human reading (e.g. logs, config files, etc.). This can probably be done by the same algorithm we use in com-lihaoyi/pprint, described here http://www.lihaoyi.com/post/CompactStreamingPrettyPrintingofHierarchicalData.html, though it would need to be adapted to work in a "push" style to fit into a upickle.core.Visitor

lihaoyi commented 1 year ago

Here's an old implementation I found that attempts to do this, for a 7-year-old-version of uPickle. Probably not fully correct, but it worked well enough at the time


class FansiBuilder{
  val sb = new StringBuilder
  val state = new Aggregator[fansi.Str.State]()
  def append(s: String): Unit = append(s, fansi.Attrs.Empty)
  def append(s: String, strState: fansi.Attrs): Unit = {
    sb.append(s)
    for(i <- 0 until s.length){
      state.append(strState.applyMask)
    }
  }
  def append(c: Char): Unit = append(c, fansi.Attrs.Empty)
  def append(c: Char, strState: fansi.Attrs): Unit = {
    sb.append(c)
    state.append(strState.applyMask)
  }
  def append(s: fansi.Str) = {
    sb.append(s.plainText)
    for(c <- s.getColors) state.append(c)

  }
  def toStr = fansi.Str.fromArrays(sb.toArray, state.toArray)
}

/**
  * Fork of the renderer in upickle.json's renderer, except...
  *
  * - It renders colored fansi.Str's instead of normal Strings
  *
  * - It always puts the `key` key first in a dictionary, since that's the
  *   most important in our logs, and sorts the other keys for consistency
  *
  * - It attempts to be clever with wrapping so if any part of a JSON blob
  *   can fit in one line (considering indentation etc), it one-lines it
  */
class ColorJsonRenderer(literalColor: fansi.Attrs = fansi.Color.Green,
                        keywordColor: fansi.Attrs = fansi.Color.Yellow) {

  final def render(sb: FansiBuilder, depth: Int, jv: Js.Value, indent: Int, startOffset: Int): Unit =
    jv match {
      case Js.Null => sb.append(keywordColor("null"))
      case Js.True => sb.append(keywordColor("true"))
      case Js.False => sb.append(keywordColor("false"))
      case Js.Num(n) => sb.append(if (n == n.toLong) n.toLong.toString else n.toString, literalColor)
      case Js.Str(s) => renderString(sb, s)
      case Js.Arr(vs@_*) => renderArray(sb, depth, vs, indent)
      case Js.Obj(vs@_*) => renderObject(sb, depth, canonicalizeObject(vs), indent, startOffset)
    }

  def canonicalizeObject(vs: Seq[(String, Js.Value)]) = {
    (vs.find(_._1 == "key") ++ vs.filter(_._1 != "key").sortBy(_._1)).iterator
  }

  def renderString(sb: FansiBuilder, s: String) = {

    escape(sb, s, false)

  }

  final def renderIndent(sb: FansiBuilder, depth: Int, indent: Int) = {
    if (indent == 0) ()
    else {
      sb.append('\n')
      for(_ <- 0 until (indent * depth)) sb.append(' ')
    }
  }
  final def renderArray(sb: FansiBuilder, depth: Int, vs: Seq[Js.Value], indent: Int): Unit = {
    if (vs.isEmpty) sb.append("[]")
    else {
      sb.append('[')
      renderIndent(sb, depth + 1, indent)
      render(sb, depth + 1, vs(0), indent, 0)
      var i = 1
      while (i < vs.length) {
        sb.append(',')
        renderIndent(sb, depth + 1, indent)
        render(sb, depth + 1, vs(i), indent, 0)
        i += 1
      }
      renderIndent(sb, depth, indent)
      sb.append(']')
    }
  }

  final def renderObject(sb: FansiBuilder,
                         depth: Int,
                         it0: Iterator[(String, Js.Value)],
                         indent: Int,
                         startOffset: Int): Unit = {
    if (!it0.hasNext) return { sb.append("{}"); () }

    val rendered =
      for((k, v) <- it0)
      yield {
        val sbk = new FansiBuilder
        renderString(sbk, k)
        val fansiK = sbk.toStr
        val sb = new FansiBuilder
        render(sb, depth + 1, v, indent, fansiK.length + 4)
        val fansiStr = sb.toStr
        ((fansiK, fansiStr), k.length + fansiStr.length + 4)
      }
    val (items, itemLengths) = rendered.toArray.unzip
    val it = items.iterator
    val (k0, v0) = it.next
    val inlineLength = itemLengths.sum
    val indent2 = if (inlineLength + startOffset > 80) indent else 0

    sb.append('{')
    renderIndent(sb, depth + 1, indent2)
    sb.append(k0)
    sb.append(':')
    if(indent != 0) sb.append(' ')
    sb.append(v0)
    while (it.hasNext) {
      val (k, v) = it.next
      sb.append(',')

      if (indent2 == 0) sb.append(' ')
      renderIndent(sb, depth + 1, indent2)
      sb.append(k)
      sb.append(':')
      if(indent != 0) sb.append(' ')
      sb.append(v)
    }
    renderIndent(sb, depth, indent2)
    sb.append('}')
  }

  final def escape(sb: FansiBuilder, s: String, unicode: Boolean): Unit = {
    sb.append("\"", literalColor)
    var i = 0
    val len = s.length
    while (i < len) {
      (s.charAt(i): @switch) match {
        case '"' => sb.append("\\\"", literalColor)
        case '\\' => sb.append("\\\\", literalColor)
        case '\b' => sb.append("\\b", literalColor)
        case '\f' => sb.append("\\f", literalColor)
        case '\n' => sb.append("\\n", literalColor)
        case '\r' => sb.append("\\r", literalColor)
        case '\t' => sb.append("\\t", literalColor)
        case c =>
          if (c < ' ' || (c > '~' && unicode)) {
            sb.append("\\u%04x" format c.toInt, literalColor)
          } else {
            sb.append(c, literalColor)
          }
      }
      i += 1
    }
    sb.append("\"", literalColor)
  }
}