Closed gerardba closed 11 years ago
Hello,
Are you using ganglia::gmetad as well? If so, are you comfortable sending me a pull request to fix the puppet >= 3 compatibility issues?
Also, it looks like you've added some ACLs. I'd be happy to merge a [separate] pull request to add class parameters/erb code for that.
Thanks for reporting.
-Josh
On 12/05/2012 12:28 PM, Gerard Bernabeu wrote:
Hi,
I'm using your ganglia::gmond module under SLF6 and I had to update the ganglia/templates/gmond.conf.el6.erb to get the right variable scope. It is updating the code so it looks like this:
name = "<%= scope.lookupvar('ganglia::gmond::cluster_name') %>"
I've not updated the other files, this is how the puppet-3.0.1 working ganglia/templates/gmond.conf.el6.erb looks:
/* This configuration is as close to 2.5.x default behavior as possible The values closely match ./gmond/metric.h definitions in 2.5.x */
globals { daemonize = yes setuid = yes user = ganglia debug_level = 0 max_udp_msg_len = 1472 mute = no deaf = no allow_extra_data = yes host_dmax = 3600 /_secs / cleanup_threshold = 300 /secs / gexec = no send_metadata_interval = 30 /_secs / }
/*
- The cluster attributes specified will be used as part of the
- tag that will wrap all hosts collected by this instance. */ cluster { name = "<%= scope.lookupvar('ganglia::gmond::cluster_name') %>" owner = "<%= scope.lookupvar('ganglia::gmond::cluster_owner') %>" latlong = "<%= scope.lookupvar('ganglia::gmond::cluster_latlong') %>" url = "<%= scope.lookupvar('ganglia::gmond::cluster_url') %>" }
/* The host section describes attributes of the host, like the location */ host { location = "<%= scope.lookupvar('ganglia::gmond::host_location') %>" }
/* Feel free to specify as many udp_send_channels as you like. Gmond used to only support having a single channel */ <% scope.lookupvar('ganglia::gmond::udp_send_channel').each do |channel| -%> udp_send_channel { <%- if channel['mcast_join'] then -%> mcast_join = <%= channel['mcast_join'] %> <%- end -%> <%- if channel['host'] then -%> host = <%= channel['host'] %> <%- end -%> <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%> <%- if channel['ttl'] then -%> ttl = <%= channel['ttl'] %> <%- end -%> }
<% end -%> /* You can specify as many udp_recv_channels as you like as well. */ <% scope.lookupvar('ganglia::gmond::udp_recv_channel').each do |channel| -%> udp_recv_channel { <%- if channel['mcast_join'] then -%> mcast_join = <%= channel['mcast_join'] %> <%- end -%> <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%> <%- if channel['bind'] then -%> bind = <%= channel['bind'] %> <%- end -%> }
<% end -%> /* You can specify as many tcp_accept_channels as you like to share an xml description of the state of the cluster */ <% scope.lookupvar('ganglia::gmond::tcp_accept_channel').each do |channel| -%> tcp_accept_channel { <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%>
acl{ default="deny" access { ip = 131.225.152.143 mask = 32 action = "allow" } access { ip = 131.225.152.37 mask = 32 action = "allow" } access { ip = 127.0.0.1 mask = 32 action = "allow" } } }
<% end -%> /* Each metrics module that is referenced by gmond must be specified and loaded. If the module has been statically linked with gmond, it does not require a load path. However all dynamically loadable modules must include a load path. */ modules { module { name = "core_metrics" } module { name = "cpu_module" path = "modcpu.so" } module { name = "disk_module" path = "moddisk.so" } module { name = "load_module" path = "modload.so" } module { name = "mem_module" path = "modmem.so" } module { name = "net_module" path = "modnet.so" } module { name = "proc_module" path = "modproc.so" } module { name = "sys_module" path = "modsys.so" } }
include ('/etc/ganglia/conf.d/*.conf')
/* The old internal 2.5.x metric array has been replaced by the following collection_group directives. What follows is the default behavior for collecting and sending metrics that is as close to 2.5.x behavior as possible. */
/* This collection group will cause a heartbeat (or beacon) to be sent every 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses the age of the running gmond. */ collection_group { collect_once = yes time_threshold = 20 metric { name = "heartbeat" } }
/* This collection group will send general info about this host every 1200 secs. This information doesn't change between reboots and is only collected once. // collection_group { collect_once = yes time_threshold = 1200 metric { name = "cpu_num" title = "CPU Count" } metric { name = "cpu_speed" title = "CPU Speed" } metric { name = "mem_total" title = "Memory Total" } // Should this be here? Swap can be added/removed between reboots. */ metric { name = "swap_total" title = "Swap Space Total" } metric { name = "boottime" title = "Last Boot Time" } metric { name = "machine_type" title = "Machine Type" } metric { name = "os_name" title = "Operating System" } metric { name = "os_release" title = "Operating System Release" } metric { name = "location" title = "Location" } }
/* This collection group will send the status of gexecd for this host every 300 secs./ / Unlike 2.5.x the default behavior is to report gexecd OFF. */ collection_group { collect_once = yes time_threshold = 300 metric { name = "gexec" title = "Gexec Status" } }
/* This collection group will collect the CPU status info every 20 secs. The time threshold is set to 90 seconds. In honesty, this time_threshold could be set significantly higher to reduce unneccessary network chatter. // collection_group { collect_every = 20 time_threshold = 90 // CPU status // metric { name = "cpu_user" value_threshold = "1.0" title = "CPU User" } metric { name = "cpu_system" value_threshold = "1.0" title = "CPU System" } metric { name = "cpu_idle" value_threshold = "5.0" title = "CPU Idle" } metric { name = "cpu_nice" value_threshold = "1.0" title = "CPU Nice" } metric { name = "cpu_aidle" value_threshold = "5.0" title = "CPU aidle" } metric { name = "cpu_wio" value_threshold = "1.0" title = "CPU wio" } // The next two metrics are optional if you want more detail... ... since they are accounted for in cpu_system. metric { name = "cpu_intr" value_threshold = "1.0" title = "CPU intr" } metric { name = "cpu_sintr" value_threshold = "1.0" title = "CPU sintr" } */ }
collection_group { collect_every = 20 time_threshold = 90 /* Load Averages */ metric { name = "load_one" value_threshold = "1.0" title = "One Minute Load Average" } metric { name = "load_five" value_threshold = "1.0" title = "Five Minute Load Average" } metric { name = "load_fifteen" value_threshold = "1.0" title = "Fifteen Minute Load Average" } }
/* This group collects the number of running and total processes */ collection_group { collect_every = 80 time_threshold = 950 metric { name = "proc_run" value_threshold = "1.0" title = "Total Running Processes" } metric { name = "proc_total" value_threshold = "1.0" title = "Total Processes" } }
/* This collection group grabs the volatile memory metrics every 40 secs and sends them at least every 180 secs. This time_threshold can be increased significantly to reduce unneeded network traffic. */ collection_group { collect_every = 40 time_threshold = 180 metric { name = "mem_free" value_threshold = "1024.0" title = "Free Memory" } metric { name = "mem_shared" value_threshold = "1024.0" title = "Shared Memory" } metric { name = "mem_buffers" value_threshold = "1024.0" title = "Memory Buffers" } metric { name = "mem_cached" value_threshold = "1024.0" title = "Cached Memory" } metric { name = "swap_free" value_threshold = "1024.0" title = "Free Swap Space" } }
collection_group { collect_every = 40 time_threshold = 300 metric { name = "bytes_out" value_threshold = 4096 title = "Bytes Sent" } metric { name = "bytes_in" value_threshold = 4096 title = "Bytes Received" } metric { name = "pkts_in" value_threshold = 256 title = "Packets Received" } metric { name = "pkts_out" value_threshold = 256 title = "Packets Sent" } }
/* Different than 2.5.x default since the old config made no sense */ collection_group { collect_every = 1800 time_threshold = 3600 metric { name = "disk_total" value_threshold = 1.0 title = "Total Disk Space" } }
collection_group { collect_every = 40 time_threshold = 180 metric { name = "disk_free" value_threshold = 1.0 title = "Disk Space Available" } metric { name = "part_max_used" value_threshold = 1.0 title = "Maximum Disk Space Used" } }
— Reply to this email directly or view it on GitHub https://github.com/jhoblitt/puppet-ganglia/issues/1.
I believe v0.0.2 resolves the puppet >= 3 compatibility issue from my own testing and the travis-ci tests. Please reopen this issue if your still having templating issues.
Hi,
I'm using your ganglia::gmond module under SLF6 and I had to update the ganglia/templates/gmond.conf.el6.erb to get the right variable scope. It is updating the code so it looks like this:
name = "<%= scope.lookupvar('ganglia::gmond::cluster_name') %>"
I've not updated the other files, this is how the puppet-3.0.1 working ganglia/templates/gmond.conf.el6.erb looks:
/* This configuration is as close to 2.5.x default behavior as possible The values closely match ./gmond/metric.h definitions in 2.5.x */
globals { daemonize = yes setuid = yes user = ganglia debug_level = 0 max_udp_msg_len = 1472 mute = no deaf = no allow_extra_data = yes host_dmax = 3600 /_secs / cleanup_threshold = 300 /secs / gexec = no send_metadata_interval = 30 /_secs / }
/*
/* The host section describes attributes of the host, like the location */ host { location = "<%= scope.lookupvar('ganglia::gmond::host_location') %>" }
/* Feel free to specify as many udp_send_channels as you like. Gmond used to only support having a single channel */ <% scope.lookupvar('ganglia::gmond::udp_send_channel').each do |channel| -%> udp_send_channel { <%- if channel['mcast_join'] then -%> mcast_join = <%= channel['mcast_join'] %> <%- end -%> <%- if channel['host'] then -%> host = <%= channel['host'] %> <%- end -%> <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%> <%- if channel['ttl'] then -%> ttl = <%= channel['ttl'] %> <%- end -%> }
<% end -%> /* You can specify as many udp_recv_channels as you like as well. */ <% scope.lookupvar('ganglia::gmond::udp_recv_channel').each do |channel| -%> udp_recv_channel { <%- if channel['mcast_join'] then -%> mcast_join = <%= channel['mcast_join'] %> <%- end -%> <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%> <%- if channel['bind'] then -%> bind = <%= channel['bind'] %> <%- end -%> }
<% end -%> /* You can specify as many tcp_accept_channels as you like to share an xml description of the state of the cluster */ <% scope.lookupvar('ganglia::gmond::tcp_accept_channel').each do |channel| -%> tcp_accept_channel { <%- if channel['port'] then -%> port = <%= channel['port'] %> <%- end -%>
acl{ default="deny" access { ip = 131.225.152.143 mask = 32 action = "allow" } access { ip = 131.225.152.37 mask = 32 action = "allow" } access { ip = 127.0.0.1 mask = 32 action = "allow" } } }
<% end -%> /* Each metrics module that is referenced by gmond must be specified and loaded. If the module has been statically linked with gmond, it does not require a load path. However all dynamically loadable modules must include a load path. */ modules { module { name = "core_metrics" } module { name = "cpu_module" path = "modcpu.so" } module { name = "disk_module" path = "moddisk.so" } module { name = "load_module" path = "modload.so" } module { name = "mem_module" path = "modmem.so" } module { name = "net_module" path = "modnet.so" } module { name = "proc_module" path = "modproc.so" } module { name = "sys_module" path = "modsys.so" } }
include ('/etc/ganglia/conf.d/*.conf')
/* The old internal 2.5.x metric array has been replaced by the following collection_group directives. What follows is the default behavior for collecting and sending metrics that is as close to 2.5.x behavior as possible. */
/* This collection group will cause a heartbeat (or beacon) to be sent every 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses the age of the running gmond. */ collection_group { collect_once = yes time_threshold = 20 metric { name = "heartbeat" } }
/* This collection group will send general info about this host every 1200 secs. This information doesn't change between reboots and is only collected once. _/ collection_group { collect_once = yes time_threshold = 1200 metric { name = "cpu_num" title = "CPU Count" } metric { name = "cpu_speed" title = "CPU Speed" } metric { name = "memtotal" title = "Memory Total" } / Should this be here? Swap can be added/removed between reboots. */ metric { name = "swap_total" title = "Swap Space Total" } metric { name = "boottime" title = "Last Boot Time" } metric { name = "machine_type" title = "Machine Type" } metric { name = "os_name" title = "Operating System" } metric { name = "os_release" title = "Operating System Release" } metric { name = "location" title = "Location" } }
/* This collection group will send the status of gexecd for this host every 300 secs./ / Unlike 2.5.x the default behavior is to report gexecd OFF. */ collection_group { collect_once = yes time_threshold = 300 metric { name = "gexec" title = "Gexec Status" } }
/* This collection group will collect the CPU status info every 20 secs. The time threshold is set to 90 seconds. In honesty, this timethreshold could be set significantly higher to reduce unneccessary network chatter. / collection_group { collect_every = 20 timethreshold = 90 / CPU status _/ metric { name = "cpu_user" value_threshold = "1.0" title = "CPU User" } metric { name = "cpu_system" value_threshold = "1.0" title = "CPU System" } metric { name = "cpu_idle" value_threshold = "5.0" title = "CPU Idle" } metric { name = "cpu_nice" value_threshold = "1.0" title = "CPU Nice" } metric { name = "cpu_aidle" value_threshold = "5.0" title = "CPU aidle" } metric { name = "cpu_wio" valuethreshold = "1.0" title = "CPU wio" } / The next two metrics are optional if you want more detail... ... since they are accounted for in cpu_system. metric { name = "cpu_intr" value_threshold = "1.0" title = "CPU intr" } metric { name = "cpu_sintr" value_threshold = "1.0" title = "CPU sintr" } */ }
collection_group { collect_every = 20 time_threshold = 90 /* Load Averages */ metric { name = "load_one" value_threshold = "1.0" title = "One Minute Load Average" } metric { name = "load_five" value_threshold = "1.0" title = "Five Minute Load Average" } metric { name = "load_fifteen" value_threshold = "1.0" title = "Fifteen Minute Load Average" } }
/* This group collects the number of running and total processes */ collection_group { collect_every = 80 time_threshold = 950 metric { name = "proc_run" value_threshold = "1.0" title = "Total Running Processes" } metric { name = "proc_total" value_threshold = "1.0" title = "Total Processes" } }
/* This collection group grabs the volatile memory metrics every 40 secs and sends them at least every 180 secs. This time_threshold can be increased significantly to reduce unneeded network traffic. */ collection_group { collect_every = 40 time_threshold = 180 metric { name = "mem_free" value_threshold = "1024.0" title = "Free Memory" } metric { name = "mem_shared" value_threshold = "1024.0" title = "Shared Memory" } metric { name = "mem_buffers" value_threshold = "1024.0" title = "Memory Buffers" } metric { name = "mem_cached" value_threshold = "1024.0" title = "Cached Memory" } metric { name = "swap_free" value_threshold = "1024.0" title = "Free Swap Space" } }
collection_group { collect_every = 40 time_threshold = 300 metric { name = "bytes_out" value_threshold = 4096 title = "Bytes Sent" } metric { name = "bytes_in" value_threshold = 4096 title = "Bytes Received" } metric { name = "pkts_in" value_threshold = 256 title = "Packets Received" } metric { name = "pkts_out" value_threshold = 256 title = "Packets Sent" } }
/* Different than 2.5.x default since the old config made no sense */ collection_group { collect_every = 1800 time_threshold = 3600 metric { name = "disk_total" value_threshold = 1.0 title = "Total Disk Space" } }
collection_group { collect_every = 40 time_threshold = 180 metric { name = "disk_free" value_threshold = 1.0 title = "Disk Space Available" } metric { name = "part_max_used" value_threshold = 1.0 title = "Maximum Disk Space Used" } }