File: //opt/netdata/netdata-configs/orig/health.d/azure_monitor_iot_hub.conf
# you can disable an alarm notification by setting the 'to' line to: silent
# --- D2C Telemetry Throttling ---
template: am_iot_hub_d2c_telemetry_throttle
on: azure_monitor.iot_hub.d2c_telemetry_throttle
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of throttled
units: errors/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub telemetry throttling on ${label:resource_name}
info: Device-to-cloud telemetry throttling errors on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Throttling indicates the hub is exceeding its message rate limits
to: sysadmin
# --- C2D Messages Expired ---
template: am_iot_hub_c2d_messages_expired
on: azure_monitor.iot_hub.c2d_messages_expired
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of expired
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (5) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub C2D messages expiring on ${label:resource_name}
info: Cloud-to-device messages expiring before delivery on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Expired messages indicate devices are not receiving commands in time
to: sysadmin
# --- C2D Direct Method Failures ---
template: am_iot_hub_c2d_methods_failed
on: azure_monitor.iot_hub.c2d_methods
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: invocations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub direct method failures on ${label:resource_name}
info: Failed cloud-to-device direct method invocations on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- C2D Twin Read Failures ---
template: am_iot_hub_c2d_twin_read_failures
on: azure_monitor.iot_hub.c2d_twin_reads
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: operations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub backend twin read failures on ${label:resource_name}
info: Failed backend twin read operations on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- C2D Twin Update Failures ---
template: am_iot_hub_c2d_twin_update_failures
on: azure_monitor.iot_hub.c2d_twin_updates
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: operations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub backend twin update failures on ${label:resource_name}
info: Failed backend twin update operations on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- D2C Twin Read Failures ---
template: am_iot_hub_d2c_twin_read_failures
on: azure_monitor.iot_hub.d2c_twin_reads
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: operations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub device twin read failures on ${label:resource_name}
info: Failed device-initiated twin read operations on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- D2C Twin Update Failures ---
template: am_iot_hub_d2c_twin_update_failures
on: azure_monitor.iot_hub.d2c_twin_updates
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: operations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub device twin update failures on ${label:resource_name}
info: Failed device-initiated twin update operations on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Routing Dropped Messages ---
template: am_iot_hub_routing_dropped
on: azure_monitor.iot_hub.routing_deliveries
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of dropped
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (5) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub routing dropped messages on ${label:resource_name}
info: Messages dropped by the routing engine on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Dropped messages indicate dead endpoints or misconfigured routes
to: sysadmin
# --- Routing Orphaned Messages ---
template: am_iot_hub_routing_orphaned
on: azure_monitor.iot_hub.routing_deliveries
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of orphaned
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (5) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub routing orphaned messages on ${label:resource_name}
info: Orphaned messages with no matching routing rule on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Orphaned messages indicate missing or incomplete routing configuration
to: sysadmin
# --- Routing Invalid Messages ---
template: am_iot_hub_routing_invalid
on: azure_monitor.iot_hub.routing_deliveries
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of invalid
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (5) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub routing invalid messages on ${label:resource_name}
info: Invalid messages rejected by the routing engine on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Routing Latency ---
template: am_iot_hub_routing_latency
on: azure_monitor.iot_hub.routing_latency
class: Latency
type: Messaging
component: IoT Hub
lookup: max -5m unaligned
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub routing latency on ${label:resource_name}
info: Maximum message routing latency across all endpoints on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Event Grid Latency ---
template: am_iot_hub_event_grid_latency
on: azure_monitor.iot_hub.event_grid_latency
class: Latency
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub Event Grid latency on ${label:resource_name}
info: Average Event Grid delivery latency on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Job Failures ---
template: am_iot_hub_jobs_failed
on: azure_monitor.iot_hub.jobs_status
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: operations/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (5) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub job failures on ${label:resource_name}
info: Failed jobs on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Twin Query Failures ---
template: am_iot_hub_twin_query_failures
on: azure_monitor.iot_hub.twin_queries
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of failed
units: queries/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub twin query failures on ${label:resource_name}
info: Failed twin queries on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- C2D Commands Abandoned ---
template: am_iot_hub_c2d_commands_abandoned
on: azure_monitor.iot_hub.c2d_commands
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of abandoned
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub C2D commands abandoned on ${label:resource_name}
info: Cloud-to-device commands abandoned by devices on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Abandoned commands indicate devices are not properly handling received messages
to: sysadmin
# --- C2D Commands Rejected ---
template: am_iot_hub_c2d_commands_rejected
on: azure_monitor.iot_hub.c2d_commands
class: Errors
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of rejected
units: messages/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub C2D commands rejected on ${label:resource_name}
info: Cloud-to-device commands rejected by devices on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Rejected commands indicate devices are explicitly refusing messages
to: sysadmin
# --- Routing Delivery Latency (Preview) ---
template: am_iot_hub_routing_delivery_latency_preview
on: azure_monitor.iot_hub.routing_delivery_latency_preview
class: Latency
type: Messaging
component: IoT Hub
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: IoT Hub routing delivery latency on ${label:resource_name}
info: Average routing delivery latency (preview metric) on IoT Hub ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin