basho-labs / riak_explorer

Riak dev-mode and admin GUI
Apache License 2.0
61 stars 15 forks source link

Implement 'riak config effective' API endpoint per node #95

Closed dmitrizagidulin closed 8 years ago

dmitrizagidulin commented 8 years ago

Implement a /explore/nodes/$node/config API endpoint, to return the equivalent of doing a riak config effective on the command-line. (See Retrieving a Configuration Listing docs.)

The result can be either in text format (key = value config format like the CLI command), or (preferably) in JSON format.

travisbhartwell commented 8 years ago

@dmitrizagidulin I've started looking into this issue and I have a question. I ran riak config effective against a Riak EE node and got the following:

anti_entropy = active
anti_entropy.bloomfilter = on
anti_entropy.concurrency_limit = 2
anti_entropy.data_dir = $(platform_data_dir)/anti_entropy
anti_entropy.max_open_files = 20
anti_entropy.throttle = on
anti_entropy.tree.build_limit.number = 1
anti_entropy.tree.build_limit.per_timespan = 1h
anti_entropy.tree.expiry = 1w
anti_entropy.trigger_interval = 15s
anti_entropy.use_background_manager = off
anti_entropy.write_buffer_size = 4MB
background_manager = off
bitcask.data_root = $(platform_data_dir)/bitcask
bitcask.expiry = off
bitcask.expiry.grace_time = 0
bitcask.fold.max_age = unlimited
bitcask.fold.max_puts = 0
bitcask.hintfile_checksums = strict
bitcask.io_mode = erlang
bitcask.max_file_size = 2GB
bitcask.max_merge_size = 100GB
bitcask.merge.policy = always
bitcask.merge.thresholds.dead_bytes = 128MB
bitcask.merge.thresholds.fragmentation = 40
bitcask.merge.thresholds.small_file = 10MB
bitcask.merge.triggers.dead_bytes = 512MB
bitcask.merge.triggers.fragmentation = 60
bitcask.merge.window.end = 23
bitcask.merge.window.start = 0
bitcask.merge_check_interval = 3m
bitcask.merge_check_jitter = 30%
bitcask.open_timeout = 4s
bitcask.sync.strategy = none
buckets.default.allow_mult = false
buckets.default.basic_quorum = false
buckets.default.dw = quorum
buckets.default.last_write_wins = false
buckets.default.merge_strategy = 1
buckets.default.n_val = 3
buckets.default.notfound_ok = true
buckets.default.pr = 0
buckets.default.pw = 0
buckets.default.r = quorum
buckets.default.rw = quorum
buckets.default.w = quorum
check_crl = on
datatypes.compression_level = 1
distributed_cookie = riak
dtrace = off
erlang.K = on
erlang.W = w
erlang.async_threads = 64
erlang.crash_dump = /var/log/riak/erl_crash.dump
erlang.distribution_buffer_size = 32MB
erlang.fullsweep_after = 0
erlang.max_ets_tables = 256000
erlang.max_ports = 65536
erlang.process_limit = 256000
erlang.schedulers.compaction_of_load = false
erlang.schedulers.force_wakeup_interval = 500
erlang.smp = enable
handoff.inbound = on
handoff.ip = 0.0.0.0
handoff.max_rejects = 6
handoff.outbound = on
handoff.port = 8099
handoff.use_background_manager = off
honor_cipher_order = on
javascript.hook_pool_size = 2
javascript.map_pool_size = 8
javascript.maximum_heap_size = 8MB
javascript.maximum_stack_size = 16MB
javascript.reduce_pool_size = 6
jmx = off
jmx.port = 41110
jmx.refresh_rate = 30s
jmx.restart_check = 10m
leveldb.block.restart_interval = 16
leveldb.block.size = 4KB
leveldb.block.size_steps = 16
leveldb.block_cache_threshold = 32MB
leveldb.bloomfilter = on
leveldb.compaction.trigger.tombstone_count = 1000
leveldb.compression = on
leveldb.data_root = $(platform_data_dir)/leveldb
leveldb.fadvise_willneed = false
leveldb.limited_developer_mem = off
leveldb.maximum_memory.percent = 70
leveldb.sync_on_write = off
leveldb.threads = 71
leveldb.tiered = off
leveldb.verify_checksums = on
leveldb.verify_compaction = on
leveldb.write_buffer_size_max = 60MB
leveldb.write_buffer_size_min = 30MB
listener.http.internal = 127.0.0.1:8098
listener.protobuf.internal = 127.0.0.1:8087
log.console = file
log.console.file = $(platform_log_dir)/console.log
log.console.level = info
log.crash = on
log.crash.file = $(platform_log_dir)/crash.log
log.crash.maximum_message_size = 64KB
log.crash.rotation = $D0
log.crash.rotation.keep = 5
log.crash.size = 10MB
log.error.file = $(platform_log_dir)/error.log
log.error.messages_per_second = 100
log.error.redirect = on
log.syslog = off
log.syslog.facility = daemon
log.syslog.ident = riak
log.syslog.level = info
max_concurrent_requests = 50000
metadata_cache_size = off
nodename = riak@127.0.0.1
object.format = 1
object.siblings.maximum = 100
object.siblings.warning_threshold = 25
object.size.maximum = 50MB
object.size.warning_threshold = 5MB
platform_bin_dir = /usr/sbin
platform_data_dir = /var/lib/riak
platform_etc_dir = /etc/riak
platform_lib_dir = /usr/lib/riak/lib
platform_log_dir = /var/log/riak
protobuf.backlog = 128
protobuf.nagle = off
retry_put_coordinator_failure = on
riak_control = off
riak_control.auth.mode = off
ring.state_dir = $(platform_data_dir)/ring
ring_size = 64
runtime_health.thresholds.busy_ports = 2
runtime_health.thresholds.busy_processes = 30
runtime_health.triggers.distribution_port = on
runtime_health.triggers.port = on
runtime_health.triggers.process.garbage_collection = off
runtime_health.triggers.process.heap_size = 160444000
runtime_health.triggers.process.long_schedule = off
sasl = off
search = off
search.anti_entropy.data_dir = $(platform_data_dir)/yz_anti_entropy
search.root_dir = $(platform_data_dir)/yz
search.solr.jmx_port = 8985
search.solr.jvm_options = -d64 -Xms1g -Xmx1g -XX:+UseStringCache -XX:+UseCompressedOops
search.solr.port = 8093
search.solr.start_timeout = 30s
secure_referer_check = on
snmp.database_dir = /var/lib/riak/snmp/agent/db
snmp.force_reload = on
snmp.nodeGetTime100Threshold = off
snmp.nodeGetTime95Threshold = off
snmp.nodeGetTime99Threshold = off
snmp.nodeGetTimeMeanThreshold = off
snmp.nodeGetTimeMedianThreshold = off
snmp.nodePutTime100Threshold = off
snmp.nodePutTime95Threshold = off
snmp.nodePutTime99Threshold = off
snmp.nodePutTimeMeanThreshold = off
snmp.nodePutTimeMedianThreshold = off
snmp.refresh_frequency = 1m
snmp.traps.replication = off
storage_backend = bitcask
strong_consistency = off
tls_protocols.sslv3 = off
tls_protocols.tlsv1 = off
tls_protocols.tlsv1.1 = off
tls_protocols.tlsv1.2 = on
transfer_limit = 2
vnode_management_timer = 10s
## The following advanced.config was used in generating the 
## configuration and may have overridden some options that were 
## commented out above.
## [{riak_core,[{cluster_mgr,{"0.0.0.0",9080}}]},
##  {riak_repl,[{data_root,"/var/lib/riak/riak_repl/"},
##              {max_fssource_cluster,5},
##              {max_fssource_node,1},
##              {max_fssink_node,1},
##              {fullsync_on_connect,true},
##              {fullsync_interval,30},
##              {rtq_max_bytes,104857600},
##              {proxy_get,disabled},
##              {rt_heartbeat_interval,15},
##              {rt_heartbeat_timeout,15},
##              {fullsync_use_background_manager,true}]}] 

Once I figure out how to reproduce this from Erlang, the question is how would you like me to handle the commented out lines at the end of the output? The prior lines are easy, as they as simple key/value pairs. Perhaps have the entire commented out section as a string with the key advanced.config? What would be most useful to you?

edit To be clear, this is with the output as JSON, not plain text.

dmitrizagidulin commented 8 years ago

Wow, how utterly strange. I didn't realize that happened (the commented-out bit at the end). So what's the semantics there? Are the settings in the commented-out section (for example, max_fssink_node) just not included in the effective config section? (I wonder -- Did the implementors just punt on parsing those correctly and including them in the listings?)

Let me look into this and get back to you...

dmitrizagidulin commented 8 years ago

Ok, so, digging into it a bit with Engineering, the details seem to have been lost in the mists of time (and in the brains of developers who are no longer at the company). (Although John Daily said he might be able to shed more light later this week, after the current crunch time).

But, from what I understand -- so, the whole point of Cuttlefish is to parse the key = value format of sysconf files, transform it via a schema, and turn it into Erlang config terms that the app can understand. And the advanced.config file is for supplementary settings that don't have schema entries, and don't have a place in riak.conf. So I suspect the riak config effective command just punts on formatting the advanced config's Erlang terms into sysconf format, and just appends them in the comments.

However, from our standpoint, we actually want to access some of those settings in the advanced config, and so would love to have them converted into JSON. See if you can figure out how to access the Erlang settings that are read in from advanced.config, so we can include them properly in the effective config output.

macintux commented 8 years ago

Dmitri's assessment is correct. A couple of points worth noting about this endeavor...

First, does it make sense to use cuttlefish at all? If you're going to parse advanced.config, why not parse the generated app.config too?

Second, and this is probably evident to everyone involved, but I like to remind myself in case someday we can actually make this visible: the effective configuration for a node is a combination of code defaults + library app.config files + cuttlefish + advanced.config + vm.args + capabilities + admins setting environment values via riak attach (and I may have missed a layer). So if there's a chance to probe Riak more thoroughly than cuttlefish can, please feel free to do so.

dmitrizagidulin commented 8 years ago

@macintux - thank you for looking into it, muchly appreciated. Let me see if I can address your points.

If you're going to parse advanced.config, why not parse the generated app.config too?

Our main reason for using riak.conf/cuttlefish (instead of parsing the generated app.config), is for consistency of experience from the user's perspective. Since Riak 2.0 and the switch to the new config file, the names of most of the settings in riak.conf (the settings as the user experiences them, and as documented in our online docs) are different than what appears in the generated app.config. For example, conf's search = on becomes yokozuna.enabled = true, and a bunch of small changes like that.

Since the Explorer project is aimed to make dev (and ops) lives easier, we figured that the 'view node configuration' capability should match what they see in their riak.conf files and the online docs.

the effective configuration for a node is a combination of code defaults + library app.config files + cuttlefish + advanced.config + vm.args + capabilities + admins setting environment values via riak attach (and I may have missed a layer).

Yeah, I totally hear you there. And I would love to have that, some day! (With proper renaming of the cuttlefish schema-related settings, too, so as not to confuse the users). I think that awesome task might be outside of our scope for the moment :)

@travisbhartwell - Ok, so, for the moment, let's just go with the following:

  1. All the non-commented-out lines of riak config effective in JSON format
  2. Read in the contents of advanced.config, if applicable, and throw them (after terms-to-json conversion) under the advanced_config: key, in the json response.

Let me know if you have any further questions.

travisbhartwell commented 8 years ago

@dmitrizagidulin Thanks! I think I've figured out how to get what you need done, hopefully I'll have something to be reviewed today.

travisbhartwell commented 8 years ago

@dmitrizagidulin One other question. I was reviewing the code for cuttlefish_escript:effective/1 which is what gets called by riak config effective. In it, it doesn't use cuttlefish if vm.args and/or app.config exist. If I'm understanding https://github.com/basho/cuttlefish/blob/develop/src/cuttlefish_escript.erl#L131-L146 correctly, it will just prints out the file names of the one or both, if they exist.

What kind of results do you want returned? I see in the discussion in #105 you mention possibly being able to return the contents of those files. Perhaps I could just return a marker indicating which of those files exist, when they do?

dmitrizagidulin commented 8 years ago

@travisbhartwell good catch. Yeah, it just gives the error message and lists those legacy config files.

Ok, so.. hmm. If you can catch that situation (presence of legacy app.config and vm.args), let's return a 404 Not Found, and also a 'Content-Type: application/json' and JSON body error message in the response:

{ "error": "Legacy configuration files found, effective config not available." }

(No need to list the files, since it automatically implies those specific two, app.config and vm.args.)

dmitrizagidulin commented 8 years ago

(Also let me know if you need some sample legacy files, to test this.)

travisbhartwell commented 8 years ago

@dmitrizagidulin Apparently webmachine doesn't allow for me to set the content when giving a 404, so will the 404 alone be sufficient?

travisbhartwell commented 8 years ago

@dmitrizagidulin Oh, my bad, I figured it out with help from @drewkerrigan. I can return content with the 404.

dmitrizagidulin commented 8 years ago

Tested, working as expected.

dmitrizagidulin commented 8 years ago

Reopening.

Currently, when trying to fetch the effective config of an invalid node, or a valid node that's not currently running, it results in a 500 server error:

{error,
    {error,function_clause,
        [{re_wm_jsonapi,res,
             [{wm_reqdata,'GET',http,
                  {1,1},
                  "127.0.0.1","127.0.0.1",
                  {wm_reqstate,#Port<0.5519>,
                      [{'content-encoding',"identity"},
                       {'content-type',"application/json"},
                       {resource_module,re_wm_node}],
                      undefined,undefined,"127.0.0.1",
                      {wm_reqdata,'GET',http,
                          {1,1},
                          "127.0.0.1","127.0.0.1",undefined,[],
                          "/explore/clusters/default/nodes/riak@127.0.0.1/config",
                          "/explore/clusters/default/nodes/riak@127.0.0.1/config",
                          [{cluster,"default"},
                           {node,"riak@127.0.0.1"},
                           {resource,"config"}],
                          [],"../../../../../..",500,1073741824,67108864,[],
                          [],
                          {3,
                           {"host",
                            {'Host',"localhost:9000"},
                            {"accept",{'Accept',"*/*"},nil,nil},
                            {"user-agent",
                             {'User-Agent',"curl/7.43.0"},
                             nil,nil}}},
                          not_fetched_yet,false,
                          {2,
                           {"content-type",
                            {"Content-Type","application/json"},
                            nil,
                            {"vary",{"Vary","Accept"},nil,nil}}},
                          <<>>,follow_request,
                          ["localhost"],
                          9000,[]},
                      undefined,undefined,
                      {wm_log_data,undefined,
                          {1449,785386,228879},
                          'GET',
                          {3,
                           {"host",
                            {'Host',"localhost:9000"},
                            {"accept",{'Accept',"*/*"},nil,nil},
                            {"user-agent",
                             {'User-Agent',"curl/7.43.0"},
                             nil,nil}}},
                          "127.0.0.1","127.0.0.1",
                          "/explore/clusters/default/nodes/riak@127.0.0.1/config",
                          {1,1},
                          404,0,undefined,undefined,undefined}},
                  [],"/explore/clusters/default/nodes/riak@127.0.0.1/config",
                  "/explore/clusters/default/nodes/riak@127.0.0.1/config",
                  [{cluster,"default"},
                   {node,"riak@127.0.0.1"},
                   {resource,"config"}],
                  [],"../../../../../..",500,1073741824,67108864,[],[],
                  {3,
                   {"host",
                    {'Host',"localhost:9000"},
                    {"accept",{'Accept',"*/*"},nil,nil},
                    {"user-agent",{'User-Agent',"curl/7.43.0"},nil,nil}}},
                  not_fetched_yet,false,
                  {2,
                   {"content-type",
                    {"Content-Type","application/json"},
                    nil,
                    {"vary",{"Vary","Accept"},nil,nil}}},
                  <<>>,follow_request,
                  ["localhost"],
                  9000,[]},
              [],
              {badrpc,nodedown},
              [],[]],
             [{file,"src/re_wm_jsonapi.erl"},{line,67}]},
         {re_wm_node,provide_content,2,
             [{file,"src/re_wm_node.erl"},{line,133}]},
         {webmachine_resource,resource_call,3,
             [{file,"src/webmachine_resource.erl"},{line,186}]},
         {webmachine_resource,do,3,
             [{file,"src/webmachine_resource.erl"},{line,142}]},
         {webmachine_decision_core,resource_call,1,
             [{file,"src/webmachine_decision_core.erl"},{line,48}]},
         {webmachine_decision_core,decision,1,
             [{file,"src/webmachine_decision_core.erl"},{line,562}]},
         {webmachine_decision_core,handle_request,2,
             [{file,"src/webmachine_decision_core.erl"},{line,33}]},
         {webmachine_mochiweb,loop,2,
             [{file,"src/webmachine_mochiweb.erl"},{line,72}]}]}}

Should instead return a 404 Not Found and a JSON error:

{ "error": "Invalid node id or node not available." }