# check for common /proc/net/softnet_stat errors
- alarm: 1hour_netdev_backlog_exceeded
+ alarm: 10min_netdev_backlog_exceeded
on: system.softnet_stat
- lookup: sum -1h unaligned absolute of dropped
+ lookup: sum -10m unaligned absolute of dropped
units: packets
- every: 5m
+ every: 1m
warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: number of packets dropped because sysctl net.core.netdev_max_backlog was exceeded
+ delay: down 1h multiplier 1.5 max 2h
+ info: number of packets dropped in the last 10min, because sysctl net.core.netdev_max_backlog was exceeded (this can be a cause for dropped packets)
to: sysadmin
- alarm: 1hour_netdev_budget_ran_outs
+ alarm: 10min_netdev_budget_ran_outs
on: system.softnet_stat
- lookup: sum -1h unaligned absolute of squeezed
+ lookup: sum -10m unaligned absolute of squeezed
units: events
- every: 5m
- warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: number of times ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining
- to: sysadmin
-
- alarm: 1hour_cpu_lock_collisions
- on: system.softnet_stat
- lookup: sum -1h unaligned absolute of collisions
- units: collisions
- every: 5m
- warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: number of times two cpus collided trying to get a network device queue lock
- to: sysadmin
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (10))
+ delay: down 1h multiplier 1.5 max 2h
+ info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining (this can be a cause for dropped packets)
+ to: silent