一、部署prometheus-server
1.1 rabc
---
# 1. 创建命名空间
apiVersion: v1
kind: Namespace
metadata:
name: monitor
---
# 2. 创建 ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitor
---
# 3. 创建自定义 ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-custom-role
rules:
# 监控核心资源
- apiGroups: [""]
resources: ["nodes","nodes/metrics","services","endpoints","pods"]
verbs: ["get", "list", "watch"]
# 添加 nodes/proxy 权限
- apiGroups: [""]
resources: ["nodes/proxy"]
verbs: ["get", "list", "watch"]
# 添加非资源 URL 权限
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
# 监控工作负载资源
- apiGroups: ["apps"]
resources: ["deployments","statefulsets","replicasets","daemonsets"]
verbs: ["get", "list", "watch"]
# 访问 metrics API
- apiGroups: ["metrics.k8s.io"]
resources: ["pods","nodes"]
verbs: ["get", "list", "watch"]
# 访问配置
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get"]
# 访问事件
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch"]
---
# 4. 创建 ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: monitor-clusterrolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-custom-role # 使用自定义角色
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitor
1.2 configmap
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-cm
namespace: monitor
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
#node节点基本监控指标采集
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
#集群活跃采集
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
#k8s-apisver端点采集
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
#k8s-svc端点采集
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kube-state-metrics'
kubernetes_sd_configs:
- role: pod
relabel_configs:
# 只抓取 kube-state-metrics Pod
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: kube-state-metrics
# 添加命名空间标签
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
# 添加 Pod 名称标签
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
# 只抓取带有 prometheus.io/scrape=true 注解的 Pod
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
# 从注解中获取抓取路径
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
# 从注解中获取端口
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
# 添加命名空间标签
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
# 添加 Pod 名称标签
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
1.3 prometheus-server-deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitor
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
#prometheus.io/scrape: 'false'
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "9090"
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/prom/prometheus:v3.5.0
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=720h
- --web.enable-lifecycle
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus
name: prometheus-cm
- mountPath: /prometheus/
name: prometheus-volume
volumes:
- name: prometheus-cm
configMap:
name: prometheus-cm
- name: prometheus-volume
hostPath:
path: /data/nfs-data/prometheus
type: Directory
1.4 prometheus-server-svc
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitor
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30090
protocol: TCP
selector:
app: prometheus
component: server
1.5 访问
评论区