File: //opt/netdata/netdata-configs/orig/health.d/db2.conf
# IBM DB2 health alerts
# Service health checks
template: db2_connection_failed
on: db2.service_health
class: Errors
type: Database
component: DB2
calc: $connection
units: status
every: 10s
crit: $this == 0
summary: DB2 connection check
info: Cannot connect to DB2 database
to: dba
template: db2_database_critical_state
on: db2.service_health
class: Errors
type: Database
component: DB2
calc: $database
units: status
every: 10s
warn: $this == 1
crit: $this >= 2
delay: down 5m multiplier 1.5 max 1h
summary: DB2 database state
info: DB2 database is in ${this == 1 ? "warning" : "critical"} state (quiesced or unknown)
to: dba
# Connection usage
template: db2_connection_usage
on: db2.connections
class: Utilization
type: Database
component: DB2
calc: ($max_allowed > 0) ? ($active * 100 / $max_allowed) : 0
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (85))
crit: $this > (($status == $CRITICAL) ? (90) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: DB2 connection usage
info: Active connections are at ${value}% of maximum on DB2 database
to: dba
# Lock timeouts
template: db2_lock_timeout_rate
on: db2.locking
class: Errors
type: Database
component: DB2
lookup: sum -1m unaligned of timeouts
units: timeouts
every: 10s
warn: $this > (($status >= $WARNING) ? (5) : (10))
crit: $this > (($status == $CRITICAL) ? (20) : (30))
delay: down 5m multiplier 1.5 max 1h
summary: DB2 lock timeouts
info: Lock timeouts in the last minute on DB2 database
to: dba
# Deadlocks
template: db2_deadlock_rate
on: db2.deadlocks
class: Errors
type: Database
component: DB2
lookup: sum -1m unaligned of deadlocks
units: deadlocks
every: 10s
warn: $this > 0
crit: $this > 5
delay: down 5m multiplier 1.5 max 1h
summary: DB2 deadlocks
info: Deadlocks detected in the last minute on DB2 database
to: dba
# Buffer pool hit ratio
template: db2_bufferpool_hit_ratio_low
on: db2.bufferpool_hit_ratio
class: Performance
type: Database
component: DB2
lookup: average -5m unaligned of hit_ratio
units: %
every: 10s
warn: $this < (($status >= $WARNING) ? (85) : (80))
crit: $this < (($status == $CRITICAL) ? (75) : (70))
delay: down 15m multiplier 1.5 max 1h
summary: DB2 buffer pool hit ratio
info: Buffer pool hit ratio is ${value}% on DB2 database (poor performance)
to: dba
# Sort overflows
template: db2_sort_overflow_ratio
on: db2.sorting
class: Performance
type: Database
component: DB2
calc: $overflows * 100 / ($sorts + $overflows)
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (3) : (5))
crit: $this > (($status == $CRITICAL) ? (8) : (10))
delay: down 15m multiplier 1.5 max 1h
summary: DB2 sort overflows
info: Sort overflow ratio is ${value}% on DB2 database
to: dba
# Log space usage
template: db2_log_space_usage
on: db2.log_space
class: Utilization
type: Database
component: DB2
calc: $used * 100 / ($used + $available)
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (70) : (75))
crit: $this > (($status == $CRITICAL) ? (85) : (90))
delay: down 5m multiplier 1.5 max 1h
summary: DB2 log space usage
info: Log space usage is ${value}% on DB2 database
to: dba
# Log utilization percentage (new metric)
template: db2_log_utilization_high
on: db2.log_utilization
class: Utilization
type: Database
component: DB2
calc: $utilization
units: %
every: 10s
warn: $this > 70
crit: $this > 85
delay: down 5m multiplier 1.5 max 1h
summary: DB2 log utilization
info: Transaction log utilization is ${value}%
to: dba
# Lock wait time (new metric)
template: db2_lock_wait_time_high
on: db2.lock_wait_time
class: Latency
type: Database
component: DB2
calc: $wait_time
units: milliseconds
every: 10s
warn: $this > 5000
crit: $this > 10000
delay: down 5m multiplier 1.5 max 1h
summary: DB2 average lock wait time
info: Average lock wait time is ${value} milliseconds
to: dba
# Lock details (new metric)
template: db2_high_lock_contention
on: db2.lock_details
class: Workload
type: Database
component: DB2
calc: $waiting_agents
units: agents
every: 10s
warn: $this > 10
crit: $this > 20
delay: down 5m multiplier 1.5 max 1h
summary: DB2 lock contention
info: ${value} agents are waiting for locks
to: dba
# Buffer pool detailed hit ratios (new metrics)
template: db2_buffer_pool_data_hit_ratio_low
on: db2.bufferpool_detailed_hit_ratio
class: Utilization
type: Database
component: DB2
calc: $data
units: %
every: 10s
warn: $this < 80
crit: $this < 70
delay: down 5m multiplier 1.5 max 1h
summary: DB2 buffer pool data hit ratio
info: Buffer pool data page hit ratio is ${value}%
to: silent
template: db2_buffer_pool_index_hit_ratio_low
on: db2.bufferpool_detailed_hit_ratio
class: Utilization
type: Database
component: DB2
calc: $index
units: %
every: 10s
warn: $this < 80
crit: $this < 70
delay: down 5m multiplier 1.5 max 1h
summary: DB2 buffer pool index hit ratio
info: Buffer pool index page hit ratio is ${value}%
to: silent
# Per-tablespace usage (if tablespace metrics are enabled)
template: db2_tablespace_usage
on: db2.tablespace_usage
class: Utilization
type: Database
component: DB2
lookup: average -1m unaligned of used
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (85))
crit: $this > (($status == $CRITICAL) ? (90) : (95))
delay: down 15m multiplier 1.5 max 1h
summary: DB2 tablespace ${label:tablespace} usage
info: Tablespace ${label:tablespace} usage is ${value}% on DB2 database
to: dba
# Per-bufferpool hit ratio (if bufferpool metrics are enabled)
template: db2_bufferpool_instance_hit_ratio
on: db2.bufferpool_instance_hit_ratio
class: Performance
type: Database
component: DB2
lookup: average -5m unaligned of overall
units: %
every: 10s
warn: $this < (($status >= $WARNING) ? (85) : (80))
crit: $this < (($status == $CRITICAL) ? (75) : (70))
delay: down 15m multiplier 1.5 max 1h
summary: DB2 buffer pool ${label:bufferpool} hit ratio
info: Buffer pool ${label:bufferpool} hit ratio is ${value}% on DB2 database
to: dba
# Per-connection CPU usage alert removed - db2.connection_cpu_time context does not exist
# The connection instance metrics only include state, executing_queries, rows_read, rows_written, total_cpu_time
# but there's no chart context for per-connection CPU time monitoring
# Long running queries
template: db2_long_running_queries
on: db2.long_running_queries
class: Performance
type: Database
component: DB2
lookup: average -1m unaligned of critical
units: queries
every: 10s
warn: $this > 0
crit: $this > 2
delay: down 5m multiplier 1.5 max 1h
summary: DB2 long running queries
info: ${value} critical long running queries on DB2 database
to: dba
# Backup age
template: db2_backup_age
on: db2.backup_age
class: Backup
type: Database
component: DB2
lookup: average -1m unaligned of full
units: hours
every: 1h
warn: $this > (($status >= $WARNING) ? (24) : (36))
crit: $this > (($status == $CRITICAL) ? (48) : (72))
delay: down 1h multiplier 1.5 max 6h
summary: DB2 last full backup age
info: Last full backup was ${value} hours ago on DB2 database
to: dba
# Backup status
template: db2_backup_status
on: db2.backup_status
class: Backup
type: Database
component: DB2
lookup: max -1m unaligned of status
units: status
every: 1h
warn: $this > 0
delay: down 1h multiplier 1.5 max 6h
summary: DB2 last backup failed
info: Last backup failed on DB2 database
to: dba
# Table scan ratio (if index metrics are enabled)
template: db2_table_scan_ratio
on: db2.index_usage
class: Performance
type: Database
component: DB2
calc: ($full * 100) / ($full + $index)
units: %
every: 1m
warn: $this > (($status >= $WARNING) ? (20) : (30))
crit: $this > (($status == $CRITICAL) ? (40) : (50))
delay: down 15m multiplier 1.5 max 1h
summary: DB2 index ${label:index} high full scan ratio
info: Index ${label:index} has a full scan ratio of ${value}%, which may indicate poor query performance.
to: dba