0.blackbox_exporte安装
验证安装
进入目录 执行 ./blackbox_exporter --version
配置自启动脚本
vim /usr/lib/systemd/system/blackbox_exporter.service
[Unit]
Description=blackbox_exporter
After=network.target
[Service]
User=root
Type=simple
ExecStart=/mnt/data/ELK/black-box/blackbox_exporter/blackbox_exporter --config.file=/mnt/data/ELK/black-box/blackbox_exporter/blackbox.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
启动black_box
systemctl enable blackbox_exporter.service
systemctl start blackbox_exporter.service
systemctl status blackbox_exporter.service
1.prometheus配置
vim /usr/local/prometheus/prometheus.yml
1.1 主机存活状态
- job_name: 'node_status'
metrics_path: /probe
params:
module: [icmp]
static_configs:
- targets: ['192.168.13.42','192.168.13.28']
labels:
instance: 'node_status'
group: 'node'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: '$1'
1.2 监控主机端口存活状态
- job_name: 'port_status'
metrics_path: /probe
params:
module: [tcp_connect]
static_configs:
- targets: ['192.168.13.29:8844','192.168.13.32:8330']
labels:
instance: 'port_status'
group: 'tcp'
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
# - source_labels: [__param_target]
# target_label: instance
- target_label: __address__
replacement: '$1'
1.3 监控网站状态
- job_name: web_status
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets: ['https://www.baidu.com']
labels:
instance: web_status
group: web
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: 127.0.0.1:9115
1.4 检查配置文件
#进入安装目录
./promtool check config prometheus.yml
#检查通过没有报错 可以刷新配置
curl -X POST http://127.0.0.1:9090/-/reload
#刷新配置的前提是 启动时 指定参数 --web.enable-lifecycle 配置告警需指定路径 --config.file=
nohup /mnt/data/ELK/prometheus/prometheus --web.enable-lifecycle --config.file=/mnt/data/ELK/prometheus/prometheus.yml >/dev/null 2>&1 &
#或使用systemctl
1.5 完整配置示例
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'codelab-monitor'
scrape_configs:
#监控主机
- job_name: 'node_status'
metrics_path: /probe
params:
module: [ icmp ]
static_configs:
- labels:
group: 'node'
instance: 'node_status'
targets:
- "192.168.13.42"
- "192.168.13.28"
relabel_configs:
- source_labels: [ __address__ ]
target_label: __param_target
- source_labels: [ __param_target ]
target_label: instance
- target_label: __address__
replacement: localhost:9115
#监控端口 tcp
- job_name: 'port_status'
metrics_path: /probe
params:
module: [ tcp_connect ]
static_configs:
- labels:
group: 'tcp'
instance: 'gateway'
targets:
- "192.168.13.29:8844"
- labels:
group: 'tcp'
instance: 'home-service'
targets:
- "192.168.13.32:8330"
relabel_configs:
- source_labels: [ __address__ ]
target_label: __param_target
- target_label: __address__
replacement: localhost:9115
#监控网站
- job_name: 'web_status'
metrics_path: /probe
params:
module: [ http_2xx ]
static_configs:
- labels:
group: 'web'
instance: 'web_status'
targets:
- "https://www.baidu.com"
relabel_configs:
- source_labels: [ __address__ ]
target_label: __param_target
- source_labels: [ __param_target ]
target_label: instance
- target_label: __address__
replacement: localhost:9115
#配置告警
rule_files:
- "/mnt/data/ELK/prometheus/my_rule.yml"
1.6 告警规则配置
groups:
- name: blackbox_network_stats
rules:
- alert: 'url服务探测失败'
expr: probe_success == 0
for: 60s
labels:
severity: high
alertinfo: push_blackbox_alert
annotations:
summary: "{{ $labels.instance }}探测失败"
description: "url探测失败,请检查业务是否正常!!!"
1.7 查看targets
访问 http://127.0.0.1:9090/targets 查看加入的监控信息。
2. 配置grafana
2.1 安装饼图插件
grafana-cli plugins install grafana-piechart-panel
service grafana-server restart
如果页面报错 方法找不到 可以尝试升级grafana版本为最新版
2.2 导入blackbox_exporter模板
数据源选择Prometheus
模板号为9965,模板下载地址 1 Blackbox Exporter Dashboard 2022/04/12 | Grafana Labs
配置成功后 如图所示
以上