[root@prometheus ~]# vim /etc/prometheus/rules/ceph_exporter.yaml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
| groups: - name: Ceph status rules: - alert: Ceph 实例不健康 expr: ceph_health_status != 0 for: 0m labels: severity: critical annotations: summary: Ceph 实例不健康{{ $labels.instance }}) description: "Ceph instance unhealthyn VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: 检测到Ceph监视器时钟偏差 expr: abs(ceph_monitor_clock_skew_seconds) > 0.2 for: 2m labels: severity: warning annotations: summary: Ceph monitor clock skew (instance {{ $labels.instance }}) description: "Ceph monitor clock skew detected. Please check ntp and hardware clock settingsn VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: Ceph监视器存储空间不足 expr: ceph_monitor_avail_percent < 10 for: 2m labels: severity: warning annotations: summary: Ceph monitor low space (instance {{ $labels.instance }}) description: "Ceph monitor storage is low.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: Ceph对象存储守护进程关闭 expr: ceph_osd_up == 0 for: 0m labels: severity: critical annotations: summary: Ceph OSD Down (instance {{ $labels.instance }}) description: "Ceph Object Storage Daemon Downn VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: Ceph高OSD延迟 expr: ceph_osd_perf_apply_latency_seconds > 5 for: 1m labels: severity: warning annotations: summary: Ceph high OSD latency (instance {{ $labels.instance }}) description: "Ceph Object Storage Daemon latency is high. Please check if it doesn't stuck in weird state.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephOSD空间不足 expr: ceph_osd_utilization > 90 for: 2m labels: severity: warning annotations: summary: Ceph OSD low space (instance {{ $labels.instance }}) description: "Ceph Object Storage Daemon is going out of space. Please add more disks.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephOSD重新加权 expr: ceph_osd_weight < 1 for: 2m labels: severity: warning annotations: summary: Ceph OSD reweighted (instance {{ $labels.instance }}) description: "Ceph Object Storage Daemon takes too much time to resize.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephPG下降 expr: ceph_pg_down > 0 for: 0m labels: severity: critical annotations: summary: Ceph PG down (instance {{ $labels.instance }}) description: "Some Ceph placement groups are down. Please ensure that all the data are available.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephPG不完整 expr: ceph_pg_incomplete > 0 for: 0m labels: severity: critical annotations: summary: Ceph PG incomplete (instance {{ $labels.instance }}) description: "Some Ceph placement groups are incomplete. Please ensure that all the data are available.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephPG不一致 expr: ceph_pg_inconsistent > 0 for: 0m labels: severity: warning annotations: summary: Ceph PG inconsistent (instance {{ $labels.instance }}) description: "Some Ceph placement groups are inconsistent. Data is available but inconsistent across nodes.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: CephPG激活时间长 expr: ceph_pg_activating > 0 for: 2m labels: severity: warning annotations: summary: Ceph PG activation long (instance {{ $labels.instance }}) description: "Some Ceph placement groups are too long to activate.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: Ceph PG回填已满 expr: ceph_pg_backfill_toofull > 0 for: 2m labels: severity: warning annotations: summary: Ceph PG backfill full (instance {{ $labels.instance }}) description: "Some Ceph placement groups are located on full Object Storage Daemon on cluster. Those PGs can be unavailable shortly. Please check OSDs, change weight or reconfigure CRUSH rules.n VALUE = {{ $value }}n LABELS = {{ $labels }}" - alert: Ceph PG不可用 expr: ceph_pg_total - ceph_pg_active > 0 for: 0m labels: severity: critical annotations: summary: Ceph PG unavailable (instance {{ $labels.instance }}) description: "Some Ceph placement groups are unavailable.n VALUE = {{ $value }}n LABELS = {{ $labels }}"
|