File: //opt/netdata/usr/lib/netdata/conf.d/health.d/snmp_cisco_nexus.conf
# Health alerts for Cisco Nexus SNMP monitoring.
# These alerts fire on metrics from the cisco-nexus SNMP profile.
# Chart contexts use the pattern: snmp.device_prof_<metric_name>
# --- vPC (Virtual Port Channel) alerts ---
template: snmp_cisco_nexus_vpc_peer_keepalive
on: snmp.device_prof_cVpcPeerKeepAliveStatus
class: Errors
type: Switch
component: VPC
lookup: average -1m unaligned percentage of alive
units: %
every: 10s
warn: $this < 100
delay: down 1m multiplier 1.5 max 5m
summary: vPC peer-keepalive not alive (domain ${label:vpc_domain_id})
info: vPC peer-keepalive link is not in 'alive' state on domain ${label:vpc_domain_id}
to: sysadmin
template: snmp_cisco_nexus_vpc_dual_active
on: snmp.device_prof_cVpcDualActiveDetectionStatus
class: Errors
type: Switch
component: VPC
lookup: average -1m unaligned percentage of detected
units: %
every: 10s
crit: $this > 0
delay: down 15s multiplier 1.5 max 5m
summary: vPC dual-active (split-brain) detected (domain ${label:vpc_domain_id})
info: vPC dual-active condition detected on domain ${label:vpc_domain_id} — both switches may be forwarding independently, risking loops
to: sysadmin
template: snmp_cisco_nexus_vpc_member_consistency
on: snmp.device_prof_cVpcStatusHostLinkConsistencyStatus
class: Errors
type: Switch
component: VPC
lookup: average -1m unaligned percentage of failed
units: %
every: 10s
warn: $this > 0
delay: down 1m multiplier 1.5 max 5m
summary: vPC member consistency check failed (domain ${label:vpc_domain_id} vPC ${label:vpc_id})
info: vPC member ${label:vpc_id} on domain ${label:vpc_domain_id} has a configuration consistency check failure
to: sysadmin
# --- HSRP (Hot Standby Router Protocol) alerts ---
template: snmp_cisco_nexus_hsrp_not_active_standby
on: snmp.device_prof_cHsrpGrpStandbyState
class: Errors
type: Switch
component: HSRP
lookup: average -1m unaligned percentage of active,standby
units: %
every: 10s
warn: $this < 100
delay: down 1m multiplier 1.5 max 5m
summary: HSRP group not active/standby (${label:interface} group ${label:hsrp_group})
info: HSRP group ${label:hsrp_group} on ${label:interface} is not in active or standby state
to: sysadmin
# --- BFD (Bidirectional Forwarding Detection) alerts ---
template: snmp_cisco_nexus_bfd_session_down
on: snmp.device_prof_ciscoBfdSessState
class: Errors
type: Switch
component: BFD
lookup: average -1m unaligned percentage of down
units: %
every: 10s
warn: $this > 0
delay: down 30s multiplier 1.5 max 5m
summary: BFD session down (session ${label:bfd_session_index})
info: BFD session ${label:bfd_session_index} is in 'down' state — sub-second failure detection triggered
to: sysadmin
# --- NTP alerts ---
template: snmp_cisco_nexus_ntp_not_synchronized
on: snmp.device_prof_cntpSysSrvStatus
class: Errors
type: Switch
component: NTP
lookup: average -5m unaligned percentage of sync_to_refclock,sync_to_remote_server,sync_to_local
units: %
every: 10s
warn: $this < 100
delay: down 5m multiplier 1.5 max 15m
summary: NTP service not synchronized
info: Cisco NTP service is not synchronized to any reference clock or remote server
to: sysadmin
# --- NX-OS Feature Control alerts ---
template: snmp_cisco_nexus_feature_enabled_not_running
on: snmp.device_prof_cfcFeatureCtrlOpStatus2
class: Errors
type: Switch
component: NX-OS
lookup: average -1m unaligned percentage of enabled_not_running
units: %
every: 10s
warn: $this > 0
delay: down 2m multiplier 1.5 max 10m
summary: NX-OS feature enabled but not running (${label:feature_name})
info: NX-OS feature '${label:feature_name}' is enabled but not running — the feature process may have crashed
to: sysadmin