# status-feeder.properties # # Copyright 2011 GroundWork Open Source, Inc. ("GroundWork") # All rights reserved. This program is free software; you can # redistribute it and/or modify it under the terms of the GNU # General Public License version 2 as published by the Free # Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. ###################################################################### ## GroundWork Status Feeder Configuration Properties ###################################################################### # The values specified here are used to control the behavior of the # nagios2collage_socket.pl script. # Possible debug_level values: # 0 = no info of any kind printed, except for startup/shutdown # messages and major errors # 1 = print just error info and summary statistical data # 2 = also print basic debug info # 3 = also print XML messages # 4 = also print detailed debug info # 5 = also print ridiculously detailed debug info debug_level = 1 # Where the log file is to be written. logfile = /usr/local/groundwork/foundation/container/logs/nagios2collage_socket.log # An identifier for the particular instance of Nagios we are tracking. # This should be set to the output of `hostname -s`, though in a pinch, # localhost will do. thisnagios = localhost # Major version number of the Nagios release being used. # Currently, should be 3. nagios_version = 3 # Absolute pathname of the Nagios status file. statusfile = /usr/local/groundwork/nagios/var/status.log # Wait time in seconds between checks of the Nagios status.log file. cycle_sleep_time = 15 # Time between full updates to the local Foundation, in seconds. This # is the longest you want to wait for updates to the LastCheckTime in # Foundation. Set this to a longer time on busy systems. Suggested 90 # second minimum, 300 second maximum. The longer the time, the larger # the bundles of updates. Setting this too long could result in a # "bumpy" performance curve, as the system processes large bundles. Old # advice: If you set this near the maximum, you might also want to also # increase the max_xml_bundle_size below. local_full_update_time = 90 # If set to true, then send only state changes and heartbeats. # If set to false, then send full data on every processing cycle, # regardless of whether the state has changed. (You are unlikely # to ever want this set to false.) # [true/false] smart_update = true # Selection of optional fields. # # Certain data fields are processed by the status feeder but are not # forwarded by default to Foundation because most customers never access # these values. Not sending data that will never be used significantly # reduces the network, processing, and storage load. A few customers # have uses for these values, though, so here we provide a means to # control whether particular non-default fields are sent. # # Here are the non-default fields in question: # # Object Nagios status.log field Foundation field name Category # ------- -------------------------- -------------------------- --------------------------------------------------------------- # host check_type CheckType data change [setup; 'ACTIVE' or 'PASSIVE'] # host last_time_down TimeDown timing change [status; UNIX epoch timestamp] # host last_time_unreachable TimeUnreachable timing change [status; UNIX epoch timestamp] # host last_time_up TimeUp timing change [status; UNIX epoch timestamp] # host failure_prediction_enabled isFailurePredictionEnabled data change [setup; 0 => disabled, 1 => enabled] # host is_flapping isHostFlapping data change [status; 0 => stable, 1 => flapping] # host obsess_over_host isObsessOverHost data change [setup; 0 => dismiss, 1 => obsess] # host process_performance_data isProcessPerformanceData data change [setup; 0 => ignore, 1 => process] # service check_type CheckType data change [setup; 'ACTIVE' or 'PASSIVE'] # service last_hard_state LastHardState data change [status; 'OK', 'WARNING', 'CRITICAL', or 'UNKNOWN'] # service last_time_critical TimeCritical timing change [status; UNIX epoch timestamp] # service last_time_ok TimeOK timing change [status; UNIX epoch timestamp] # service last_time_unknown TimeUnknown timing change [status; UNIX epoch timestamp] # service last_time_warning TimeWarning timing change [status; UNIX epoch timestamp] # service failure_prediction_enabled isFailurePredictionEnabled data change [setup; 0 => disabled, 1 => enabled] # service obsess_over_service isObsessOverService data change [setup; 0 => dismiss, 1 => obsess] # service process_performance_data isProcessPerformanceData data change [setup; 0 => ignore, 1 => process] # service is_flapping isServiceFlapping data change [status; 0 => stable, 1 => flapping] # # Data fields are classified into different sending categories, described # below. If you do not want to send any optional fields for a given sending # category, leave the corresponding setting as an empty string (""). If you # do want to send some optional fields in a given sending category, list the # Foundation field names here in a quoted, space-separated list. For example, # you might configure the following: # # send_on_service_timing_change = "TimeOK TimeWarning TimeCritical TimeUnknown" # # IMPORTANT: To keep the system load down, only configure the non-default # fields you know you will actually use. # # The sending categories are as follows: # # send_in_any_host_message # send_in_any_service_message # All of these fields are always included in any message sent for the # object. A change in any of these fields will force a message for the # host or service to be sent. These categories have no support for any # non-default fields, so they are not configurable below. # # send_on_host_data_change # send_on_service_data_change # These fields are only sent if their individual values have changed. # A change in any of these fields will force a message for the host or # service to be sent. The non-default fields that can be specified # for these categories are marked above as "data change". # # send_on_host_timing_change # send_on_service_timing_change # These fields are only sent if their individual values have changed. # However, a change in any of these fields will by itself only force # a message for the host or service to be sent if it is time for a # heartbeat. Otherwise, the change will not be posted to Foundation # until either a data change occurs for this host or service, or the # next heartbeat cycle occurs. The non-default fields that can be # specified for these categories are marked above as "timing change". # send_on_host_data_change = "" send_on_host_timing_change = "" send_on_service_data_change = "" send_on_service_timing_change = "" # Send a console message when Nagios and Foundation are out of sync? # true => send warning; false => don't send warning. # [true/false] send_sync_warning = true # Whether to send pending-to-ok transition events, or just skip them. # [true/false] send_events_for_pending_to_ok = true # Seconds to sleep before restarting after failure, # to prevent tight looping. failure_sleep_time = 10 # ---------------------------------------------------------------- # Options for sending to Foundation # ---------------------------------------------------------------- # Where to send results to (the local) Foundation. Other mechanisms # (NSCA, or GDMA spooling; see below) are used to send results to # parent or parent-standby systems. foundation_host = localhost foundation_port = 4913 # Typical number of messages to send in each bundle, for efficiency. # This is NOT the absolute minimum size, as there may occasionally # be smaller bundles sent. xml_bundle_size = 5 # Maximum number of XML messages sent in a bundle. # 150 seems to work reasonably well in testing. max_xml_bundle_size = 150 # When a new XML message is generated, send any accumulated XML messages # if this much time has elapsed since we last sent any messages, even if # we haven't yet gathered enough for a full "xml_bundle_size" bundle. # This helps prevent messages from waiting too long for companions # before an entire bundle is sent. sync_timeout_seconds = 5 # This is the actual SO_SNDBUF value, as set by setsockopt(). This is # therefore the actual size of the data buffer available for writing, # irrespective of additional kernel bookkeeping overhead. This will # have no effect without the companion as-yet-undocumented patch to # IO::Socket::INET. Set this to 0 to use the system default socket send # buffer size. A typical value to set here is 262144. (Note that the # value specified here is likely to be limited to something like 131071 # by the sysctl net.core.wmem_max parameter.) send_buffer_size = 262144 # Socket timeout (in seconds), to address GWMON-7407. Typical value # is 60. Set to 0 to disable. # # This timeout is here only for use in emergencies, when Foundation # has completely frozen up and is no longer reading (will never read) # a socket we have open. We don't want to set this value so low that # it will interfere with normal communication, even given the fact that # Foundation may wait a rather long time between sips from this straw # as it processes a large bundle of messages that we sent it, or is # otherwise busy and just cannot get back around to reading the socket # in a reasonably short time. socket_send_timeout = 60 # Maximum number of events to accumulate before sending them all as # a bundle. max_event_bundle_size = 50 # syncwait is a multiplier of cycle_sleep_time to wait on updates while # Foundation processes a sync. Typical value is 20. In theory, you # might need to increase this if you see deadlocks after commit in the # framework.log file. In practice, though, the need for this should # have completely disappeared now that we have proper synchronization # with pre-flight and commit operations in place. syncwait = 20 # ---------------------------------------------------------------- # Options for sending state data to parent/standby server(s) # ---------------------------------------------------------------- # State data may be sent to one or more parent/parent-standby servers in # addition to a local Foundation database. Such data is sent in three # possible forms, representing escalating levels of insistence. The # exact logic is somewhat more complex, but this gives you the idea. # # * State changes are sent to the remote system whenever a state change # within Nagios is detected, and thereafter roughly as long as the # state is still "soft" within Nagios. This last condition allows the # parent system to mirror the local Nagios logic that determines when # soft states turn into hard states. # # * Heartbeats are sent to the remote system on a periodic basis, and # include any state data which has not been sent since the last # heartbeat (roughly, because it has not changed or not been in a soft # state after a change, and thus has not been part of a state-change # message), but which has been checked since the last heartbeat (and # found to be still in the same state). # # * Full dumps may be sent to the remote system on a periodic basis. # These dumps include all state data, regardless of whether it has # been checked or has changed since the last full dump. Such dumps # are used as a kind of fallback measure to ensure that the local and # remote systems stay sychronized over the long term, no matter what # kinds of transient failures might occur in the short term. # # Sending of state changes and heartbeats is enabled in tandem, because # it only makes sense to send both kinds of data or neither. Sending # of full dumps is enabled independently, via the full-dump interval # (though such dumps will only be sent if sending of state changes and # heartbeats is also enabled). # # State data may be sent via either direct NSCA (send_nsca) invocations, # or indirectly by using the GDMA spooler. These channels may be # independently controlled here, using options below. Most of the # configuration for the GDMA spooler, including where it will send the # state data, is done separately in the spooler's own configuration # file. # # Finally, note that sending state data to parent/standby server(s) # is only supported if smart_update (configured above) is true. # ---------------------------------------------------------------- # Options for sending state data via direct NSCA invocations # ---------------------------------------------------------------- # Send state changes and heartbeats via NSCA? # true => send, in which case you must at least define primary_parent; # false => do not send. # [true/false] send_state_changes_by_nsca = false # Where to send primary heartbeats via NSCA. Must be a valid hostname # or IP address, if send_state_changes_by_nsca is true. Otherwise, set # this to a quoted empty string. primary_parent = "" # Also send heartbeats via NSCA to a second parent? # true => send, in which case you must define secondary_parent; # false => do not send to secondary_parent. # [true/false] send_to_secondary_NSCA = false # Where to send secondary heartbeats via NSCA. Must be a valid hostname # or IP address, if send_state_changes_by_nsca and send_to_secondary_NSCA # are true. Otherwise, set this to a quoted empty string. secondary_parent = "" # Seconds between NSCA heartbeats (approximate; will be at least # this, possibly this + remote_full_update_time). nsca_heartbeat_interval = 60 * 60 # Seconds between NSCA full dumps (approximate). # Set to zero to disable, if desired. nsca_full_dump_interval = 8 * 60 * 60 # Port the parent (and secondary parent) NSCA is listening on # (normally 5667). nsca_port = 5667 # Limit the size of batched NSCA sends to avoid overloads # (typical value 100). max_messages_per_send_nsca = 100 # Sleep this many seconds between sending batches of # max_messages_per_send_nsca results. This is used to throttle the # sending of data, so as not to overwhelm the receiver with large data # pulses and increase the risk of data loss due to a transient sending # failure. nsca_batch_delay = 2 # When sending a heartbeat via NSCA, if we get no answer from the # parent after this many seconds, give up on sending the heartbeat. nsca_timeout = 10 # ---------------------------------------------------------------- # Options for sending state data via the GDMA spooler # ---------------------------------------------------------------- # Whether to send state data and heartbeats via the GDMA spooler. # [true/false] send_state_changes_by_gdma = false # Seconds between GDMA heartbeats (approximate). gdma_heartbeat_interval = 60 * 60 # Seconds between GDMA full dumps (approximate). # Set to zero to disable, if desired. gdma_full_dump_interval = 8 * 60 * 60 # Absolute path to the base of the GDMA software installation # (typically, "/usr/local/groundwork/gdma"). This will be used to # locate the spool file the status feeder will write into. Can be set # to a quoted empty string if send_state_changes_by_gdma is false. gdma_install_base = /usr/local/groundwork/gdma # The maximum number of as-yet-unspooled results to save for a # following attempt at spooling, if an attempt to spool fails. # This is mainly a fail-safe mechanism, to avoid a potentially # infinite memory leak. You may set this to 0 to discard all # such unspooled messages. max_unspooled_results_to_save = 20000