acd / lua-yaml

LibYAML binding for Lua
MIT License
21 stars 7 forks source link

When a string value is too long it's being serialized as "binary" #4

Open subnetmarco opened 9 years ago

subnetmarco commented 9 years ago

I have a YAML file (attached) with a multiline nginx property. When I try to parse it and dump it again as it is the multiline nginx property is being stored as "binary" instead of a string:

local yaml = require "yaml"

-- Utility function to read a file
local function read_file(path)
  local contents = nil
  local file = io.open(path, "rb")
  if file then
    contents = file:read("*all")
    file:close()
  end
  return contents
end

-- Utility function to write a file
local  function write_to_file(path, value)
  local file = io.open(path, "w")
  file:write(value)
  file:close()
end

-- Load and dump the yaml content as it is
local yaml_value = yaml.load(read_file("kong_TEST.yml"))
write_to_file("kong_TEST.yml", yaml.dump(yaml_value))

This is the original YAML file:

# Enabled plugins, in this order
plugins_enabled:
  - authentication
  - ratelimiting
  - networklog

# Specify the DAO to use
database: cassandra

# Databases configuration
databases_available:
  cassandra:
    properties:
      hosts: "127.0.0.1"
      port: 9042
      timeout: 1000
      keyspace: kong_tests
      keepalive: 60000

# Cache configuration
cache:
  expiration: 5 # In seconds

nginx: |
  worker_processes auto;
  error_log logs/error.log debug;
  worker_rlimit_nofile 84280;
  daemon on;
  pid nginx.pid;

  env KONG_CONF;
  env KONG_HOME;

  events {
    worker_connections 20480;
  }

  http {
    lua_package_path "/Users/marco/git/kong/src/?.lua;;";
    lua_code_cache on;

    access_log logs/access.log;

    underscores_in_headers on;
    access_log on;
    tcp_nopush on;

    # Timeouts
    keepalive_timeout 60s;
    client_header_timeout 60s;
    client_body_timeout 60s;
    send_timeout 60s;
    reset_timedout_connection on;

    # Max Client request size
    client_max_body_size 50m;

    # Proxy buffers
    proxy_buffer_size 128k;
    proxy_buffers 4 256k;
    proxy_busy_buffers_size 256k;

    # Proxy SSL
    proxy_ssl_server_name on;

    # Timer properties
    lua_max_running_timers 4096;
    lua_max_pending_timers 16384;

    resolver 8.8.8.8;
    charset UTF-8;

    lua_shared_dict cache 512m;

    init_by_lua "kong = require 'kong'; kong.init()";

    server {
      listen 8000;

      location /robots.txt {
        return 200 "User-agent: *\nDisallow: /";
      }

      location / {
        # Assigns the default MIME-type to be used for files where the
        # standard MIME map doesn't specify anything.
        default_type 'text/plain';

        # This property will be used later by proxy_pass
        set $backend_url nil;
        set $querystring nil;

        # Authenticate the user and load the API info
        access_by_lua "kong.access()";

        # Proxy the request
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_pass $backend_url;

        # Add additional response headers
        header_filter_by_lua "kong.header_filter()";

        # Change the response body
        body_filter_by_lua "kong.body_filter()";

        # Log the request
        log_by_lua "kong.log()";
      }

      error_page 500 /500.html;
      location = /500.html {
        internal;
        content_by_lua '
          local utils = require "kong.tools.utils"
          utils.show_error(ngx.status, "Ops, an error occurred (╯°□°)╯")';
      }
    }

    server {
      listen 8001;

      location / {
        default_type application/json;
        content_by_lua '
          require("lapis").serve("kong.web.app")
        ';
      }

      location /static/ {
        alias static/;
      }

      location /admin/ {
        alias admin/;
      }

      location /favicon.ico {
        alias static/favicon.ico;
      }
    }
  }

And this is the output, as you can see the nginx property is now binary:


---
plugins_enabled:
- authentication
- ratelimiting
- networklog
nginx: !!binary d29ya2VyX3Byb2Nlc3NlcyBhdXRvOyBlcnJvcl9sb2cgbG9ncy9lcnJvci5sb2cgZGVidWc7IHdvcmtlcl9ybGltaXRfbm9maWxlIDg0MjgwOyBkYWVtb24gb247IHBpZCBuZ2lueC5waWQ7CmVudiBLT05HX0NPTkY7IGVudiBLT05HX0hPTUU7CmV2ZW50cyB7CiAgd29ya2VyX2Nvbm5lY3Rpb25zIDIwNDgwOwp9Cmh0dHAgewogIGx1YV9wYWNrYWdlX3BhdGggIi9Vc2Vycy9tYXJjby9naXQva29uZy9zcmMvPy5sdWE7OyI7CiAgbHVhX2NvZGVfY2FjaGUgb247CgogIGFjY2Vzc19sb2cgbG9ncy9hY2Nlc3MubG9nOwoKICB1bmRlcnNjb3Jlc19pbl9oZWFkZXJzIG9uOwogIGFjY2Vzc19sb2cgb247CiAgdGNwX25vcHVzaCBvbjsKCiAgIyBUaW1lb3V0cwogIGtlZXBhbGl2ZV90aW1lb3V0IDYwczsKICBjbGllbnRfaGVhZGVyX3RpbWVvdXQgNjBzOwogIGNsaWVudF9ib2R5X3RpbWVvdXQgNjBzOwogIHNlbmRfdGltZW91dCA2MHM7CiAgcmVzZXRfdGltZWRvdXRfY29ubmVjdGlvbiBvbjsKCiAgIyBNYXggQ2xpZW50IHJlcXVlc3Qgc2l6ZQogIGNsaWVudF9tYXhfYm9keV9zaXplIDUwbTsKCiAgIyBQcm94eSBidWZmZXJzCiAgcHJveHlfYnVmZmVyX3NpemUgMTI4azsKICBwcm94eV9idWZmZXJzIDQgMjU2azsKICBwcm94eV9idXN5X2J1ZmZlcnNfc2l6ZSAyNTZrOwoKICAjIFByb3h5IFNTTAogIHByb3h5X3NzbF9zZXJ2ZXJfbmFtZSBvbjsKCiAgIyBUaW1lciBwcm9wZXJ0aWVzCiAgbHVhX21heF9ydW5uaW5nX3RpbWVycyA0MDk2OwogIGx1YV9tYXhfcGVuZGluZ190aW1lcnMgMTYzODQ7CgogIHJlc29sdmVyIDguOC44Ljg7CiAgY2hhcnNldCBVVEYtODsKCiAgbHVhX3NoYXJlZF9kaWN0IGNhY2hlIDUxMm07CgogIGluaXRfYnlfbHVhICJrb25nID0gcmVxdWlyZSAna29uZyc7IGtvbmcuaW5pdCgpIjsKCiAgc2VydmVyIHsKICAgIGxpc3RlbiA4MDAwOwoKICAgIGxvY2F0aW9uIC9yb2JvdHMudHh0IHsKICAgICAgcmV0dXJuIDIwMCAiVXNlci1hZ2VudDogKlxuRGlzYWxsb3c6IC8iOwogICAgfQoKICAgIGxvY2F0aW9uIC8gewogICAgICAjIEFzc2lnbnMgdGhlIGRlZmF1bHQgTUlNRS10eXBlIHRvIGJlIHVzZWQgZm9yIGZpbGVzIHdoZXJlIHRoZQogICAgICAjIHN0YW5kYXJkIE1JTUUgbWFwIGRvZXNuJ3Qgc3BlY2lmeSBhbnl0aGluZy4KICAgICAgZGVmYXVsdF90eXBlICd0ZXh0L3BsYWluJzsKCiAgICAgICMgVGhpcyBwcm9wZXJ0eSB3aWxsIGJlIHVzZWQgbGF0ZXIgYnkgcHJveHlfcGFzcwogICAgICBzZXQgJGJhY2tlbmRfdXJsIG5pbDsKICAgICAgc2V0ICRxdWVyeXN0cmluZyBuaWw7CgogICAgICAjIEF1dGhlbnRpY2F0ZSB0aGUgdXNlciBhbmQgbG9hZCB0aGUgQVBJIGluZm8KICAgICAgYWNjZXNzX2J5X2x1YSAia29uZy5hY2Nlc3MoKSI7CgogICAgICAjIFByb3h5IHRoZSByZXF1ZXN0CiAgICAgIHByb3h5X3NldF9oZWFkZXIgWC1SZWFsLUlQICRyZW1vdGVfYWRkcjsKICAgICAgcHJveHlfc2V0X2hlYWRlciBYLUZvcndhcmRlZC1Gb3IgJHByb3h5X2FkZF94X2ZvcndhcmRlZF9mb3I7CiAgICAgIHByb3h5X3Bhc3MgJGJhY2tlbmRfdXJsOwoKICAgICAgIyBBZGQgYWRkaXRpb25hbCByZXNwb25zZSBoZWFkZXJzCiAgICAgIGhlYWRlcl9maWx0ZXJfYnlfbHVhICJrb25nLmhlYWRlcl9maWx0ZXIoKSI7CgogICAgICAjIENoYW5nZSB0aGUgcmVzcG9uc2UgYm9keQogICAgICBib2R5X2ZpbHRlcl9ieV9sdWEgImtvbmcuYm9keV9maWx0ZXIoKSI7CgogICAgICAjIExvZyB0aGUgcmVxdWVzdAogICAgICBsb2dfYnlfbHVhICJrb25nLmxvZygpIjsKICAgIH0KCiAgICBlcnJvcl9wYWdlIDUwMCAvNTAwLmh0bWw7CiAgICBsb2NhdGlvbiA9IC81MDAuaHRtbCB7CiAgICAgIGludGVybmFsOwogICAgICBjb250ZW50X2J5X2x1YSAnCiAgICAgICAgbG9jYWwgdXRpbHMgPSByZXF1aXJlICJrb25nLnRvb2xzLnV0aWxzIgogICAgICAgIHV0aWxzLnNob3dfZXJyb3Iobmd4LnN0YXR1cywgIk9wcywgYW4gZXJyb3Igb2NjdXJyZWQgKOKVr8Kw4pahwrDvvInila8iKSc7CiAgICB9CiAgfQoKICBzZXJ2ZXIgewogICAgbGlzdGVuIDgwMDE7CgogICAgbG9jYXRpb24gLyB7CiAgICAgIGRlZmF1bHRfdHlwZSBhcHBsaWNhdGlvbi9qc29uOwogICAgICBjb250ZW50X2J5X2x1YSAnCiAgICAgICAgcmVxdWlyZSgibGFwaXMiKS5zZXJ2ZSgia29uZy53ZWIuYXBwIikKICAgICAgJzsKICAgIH0KCiAgICBsb2NhdGlvbiAvc3RhdGljLyB7CiAgICAgIGFsaWFzIHN0YXRpYy87CiAgICB9CgogICAgbG9jYXRpb24gL2FkbWluLyB7CiAgICAgIGFsaWFzIGFkbWluLzsKICAgIH0KCiAgICBsb2NhdGlvbiAvZmF2aWNvbi5pY28gewogICAgICBhbGlhcyBzdGF0aWMvZmF2aWNvbi5pY287CiAgICB9CiAgfQp9Cg==
cache:
  expiration: 5
databases_available:
  cassandra:
    properties:
      keepalive: 60000
      port: 9042
      timeout: 1000
      hosts: 127.0.0.1
      keyspace: kong_tests
database: cassandra
subnetmarco commented 9 years ago

I found out why it behaves like this: When the library finds an UTF-8 value (with a decimal ASCII value greater than 128, or Hex greater than 0x80), it saves the data in a Base64 encoded binary format, instead of dumping the value as it is (if the system supports UTF-8 encoding there would be no need to Base64 encode the value).

https://github.com/acd/lua-yaml/blob/master/lyaml.c#L371

Maybe adding a configuration option would make it possible to enable/disable this check and would fix this edge case?