基于Operator部署Prometheus实现K8S监控
1 下载源代码并导入镜像
1.下载源代码
wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.11.0.tar.gz
[root@master231 02-prometheus]# wget http://192.168.14.253/Resources/Kubernetes/Project/Prometheus/manifests/kube-prometheus-0.11.0.tar.gz
2.解压目录
[root@master231 02-prometheus]# tar xf kube-prometheus-0.11.0.tar.gz
[root@master231 02-prometheus]#
[root@master231 02-prometheus]# cd kube-prometheus-0.11.0/
[root@master231 kube-prometheus-0.11.0]#
3.导入镜像
mkdir prometheus && cd prometheus
wget http://192.168.14.253/Resources/Kubernetes/Project/Prometheus/batch-load-prometheus-v0.11.0-images.sh
bash batch-load-prometheus-v0.11.0-images.sh 14
2 安装Prometheus-Operator
kubectl apply --server-side -f manifests/setup
kubectl wait \
--for condition=Established \
--all CustomResourceDefinition \
--namespace=monitoring
kubectl apply -f manifests/
5.检查Prometheus是否部署成功
[root@master231 kube-prometheus-0.11.0]# kubectl get pods -n monitoring -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
alertmanager-main-0 2/2 Running 0 35s 10.100.203.159 worker232 <none> <none>
alertmanager-main-1 2/2 Running 0 35s 10.100.140.77 worker233 <none> <none>
alertmanager-main-2 2/2 Running 0 35s 10.100.160.140 master231 <none> <none>
blackbox-exporter-746c64fd88-66ph5 3/3 Running 0 42s 10.100.203.158 worker232 <none> <none>
grafana-5fc7f9f55d-qnfwj 1/1 Running 0 41s 10.100.140.86 worker233 <none> <none>
kube-state-metrics-6c8846558c-pp5hf 3/3 Running 0 41s 10.100.203.173 worker232 <none> <none>
node-exporter-6z9kb 2/2 Running 0 40s 10.0.0.231 master231 <none> <none>
node-exporter-gx5dr 2/2 Running 0 40s 10.0.0.233 worker233 <none> <none>
node-exporter-rq8mn 2/2 Running 0 40s 10.0.0.232 worker232 <none> <none>
prometheus-adapter-6455646bdc-4fqcq 1/1 Running 0 39s 10.100.203.162 worker232 <none> <none>
prometheus-adapter-6455646bdc-n8flt 1/1 Running 0 39s 10.100.140.91 worker233 <none> <none>
prometheus-k8s-0 2/2 Running 0 35s 10.100.203.189 worker232 <none> <none>
prometheus-k8s-1 2/2 Running 0 35s 10.100.140.68 worker233 <none> <none>
prometheus-operator-f59c8b954-gm5ww 2/2 Running 0 38s 10.100.203.152 worker232 <none> <none>
[root@master231 kube-prometheus-0.11.0]#
3 修改service类型实现外部访问!!!
存在的问题:
可能会出现Grafana无法访问的情况,但是Grafana在哪个节点就用该节点的NodePort访问是可以的。
如果无法访问,可以使用NodePort端口访问,且grafana在worker232节点,则可以使用"http://10.0.0.232:49955/login"
后记:
我尝试过使用Traefik的Ingress和IngressRoute来解决这个问题,但也没有效果,因为svc都无法访问,而Trafik底层需要基于svc找到后端Pod。
如果说将来必须要搞定这个事情,可以学习一下iptables语法,手动将svc的解析规则做转换,在对应的worker节点添加相应的规则。
如果不会写iptables规则的话,也可以考虑使用"kubectl port-forward"实现 。
kubectl -n monitoring port-forward deploy/grafana 3000:3000 --address=0.0.0.0
我的猜测: 可能是kubeadm部署的方式对于规则没有生效,可以尝试同版本二进制的k8s集群是否存在该问题。具体问题需要后期验证,希望各位道友一起搞定这个问题。
3.1 基于nodeport
1.基于NodePort方式暴露
[root@master231 kube-prometheus-0.11.0]# cat manifests/prometheus-service.yaml
apiVersion: v1
kind: Service
metadata:
...
name: prometheus-k8s
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
port: 9090
nodePort: 9090
targetPort: web
3.2 基于端口转发
nohup kubectl port-forward service/prometheus-k8s 9090:9090 --address=0.0.0.0 -n monitoring &
4 prometheus监控etcd
4.1 测试ectd metrics接口
1.1 查看etcd证书存储路径
[root@master231 cmy]# egrep "\--key-file|--cert-file" /etc/kubernetes/manifests/etcd.yaml
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --key-file=/etc/kubernetes/pki/etcd/server.key
[root@master231 cmy]#
curl -s --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key https://10.168.10.231:2379/metrics -k | tail
kubectl create secret generic etcd-tls --from-file=/etc/kubernetes/pki/etcd/server.crt --from-file=/etc/kubernetes/pki/etcd/server.key --from-file=/etc/kubernetes/pki/etcd/ca.crt -n monitoring
4.2 创建etcd证书的secrets并挂载到Prometheus server
2.1 查找需要挂载etcd的证书文件路径
[root@master231 cmy]# egrep "\--key-file|--cert-file|--trusted-ca-file" /etc/kubernetes/manifests/etcd.yaml
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
[root@master231 cmy]#
2.2 根据etcd的实际存储路径创建secrets
[root@master231 cmy]# kubectl create secret generic etcd-tls --from-file=/etc/kubernetes/pki/etcd/server.crt --from-file=/etc/kubernetes/pki/etcd/server.key --from-file=/etc/kubernetes/pki/etcd/ca.crt -n monitoring
secret/etcd-tls created
[root@master231 cmy]#
[root@master231 cmy]# kubectl -n monitoring get secrets etcd-tls
NAME TYPE DATA AGE
etcd-tls Opaque 3 12s
[root@master231 cmy]#
2.3 修改Prometheus的资源,修改后会自动重启
[root@master231 cmy]# kubectl -n monitoring edit prometheus k8s
...
spec:
secrets:
- etcd-tls
...
[root@master231 cmy]# kubectl -n monitoring get pods -l app.kubernetes.io/component=prometheus -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
prometheus-k8s-0 2/2 Running 0 74s 10.100.1.57 worker232 <none> <none>
prometheus-k8s-1 2/2 Running 0 92s 10.100.2.28 worker233 <none> <none>
[root@master231 cmy]#
2.4.查看证书是否挂载成功
[root@master231 cmy]# kubectl -n monitoring exec prometheus-k8s-0 -c prometheus -- ls -l /etc/prometheus/secrets/etcd-tls
total 0
lrwxrwxrwx 1 root 2000 13 Jan 24 14:07 ca.crt -> ..data/ca.crt
lrwxrwxrwx 1 root 2000 17 Jan 24 14:07 server.crt -> ..data/server.crt
lrwxrwxrwx 1 root 2000 17 Jan 24 14:07 server.key -> ..data/server.key
[root@master231 cmy]#
[root@master231 cmy]# kubectl -n monitoring exec prometheus-k8s-1 -c prometheus -- ls -l /etc/prometheus/secrets/etcd-tls
total 0
lrwxrwxrwx 1 root 2000 13 Jan 24 14:07 ca.crt -> ..data/ca.crt
lrwxrwxrwx 1 root 2000 17 Jan 24 14:07 server.crt -> ..data/server.crt
lrwxrwxrwx 1 root 2000 17 Jan 24 14:07 server.key -> ..data/server.key
[root@master231 cmy]#
4.3 编写资源清单
[root@master231 servicemonitors]# cat 01-smon-etcd.yaml
apiVersion: v1
kind: Endpoints
metadata:
name: etcd-k8s
namespace: kube-system
subsets:
- addresses:
- ip: 10.0.0.231
ports:
- name: https-metrics
port: 2379
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
name: etcd-k8s
namespace: kube-system
labels:
apps: etcd
spec:
ports:
- name: https-metrics
port: 2379
targetPort: 2379
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: cmy-etcd-smon
namespace: monitoring
spec:
# 指定job的标签,可以不设置。
jobLabel: kubeadm-etcd-k8s-cmy
# 指定监控后端目标的策略
endpoints:
# 监控数据抓取的时间间隔
- interval: 30s
# 指定metrics端口,这个port对应Services.spec.ports.name
port: https-metrics
# Metrics接口路径
path: /metrics
# Metrics接口的协议
scheme: https
# 指定用于连接etcd的证书文件
tlsConfig:
# 指定etcd的CA的证书文件
caFile: /etc/prometheus/secrets/etcd-tls/ca.crt
# 指定etcd的证书文件
certFile: /etc/prometheus/secrets/etcd-tls/server.crt
# 指定etcd的私钥文件
keyFile: /etc/prometheus/secrets/etcd-tls/server.key
# 关闭证书校验,毕竟咱们是自建的证书,而非官方授权的证书文件。
insecureSkipVerify: true
# 监控目标Service所在的命名空间
namespaceSelector:
matchNames:
- kube-system
# 监控目标Service目标的标签。
selector:
# 注意,这个标签要和etcd的service的标签保持一致哟
matchLabels:
apps: etcd
[root@master231 servicemonitors]#
4.Prometheus查看数据
etcd_cluster_version
5.Grafana导入模板
3070
5 Prometheus监控非云原生应用MySQL
5.1 编写资源清单
[root@master231 servicemonitors]# cat 02-smon-mysqld.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql80-deployment
spec:
replicas: 1
selector:
matchLabels:
apps: mysql80
template:
metadata:
labels:
apps: mysql80
spec:
containers:
- name: mysql
image: cmy.cmy.com/cmy-db/mysql:8.0.36-oracle
ports:
- containerPort: 3306
env:
- name: MYSQL_ROOT_PASSWORD
value: cmy
- name: MYSQL_USER
value: cmy
- name: MYSQL_PASSWORD
value: "cmy"
---
apiVersion: v1
kind: Service
metadata:
name: mysql80-service
spec:
selector:
apps: mysql80
ports:
- protocol: TCP
port: 3306
targetPort: 3306
---
apiVersion: v1
kind: ConfigMap
metadata:
name: my.cnf
data:
.my.cnf: |-
[client]
user = cmy
password = cmy
[client.servers]
user = linux96
password = cmy
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql-exporter-deployment
spec:
replicas: 1
selector:
matchLabels:
apps: mysql-exporter
template:
metadata:
labels:
apps: mysql-exporter
spec:
volumes:
- name: data
configMap:
name: my.cnf
items:
- key: .my.cnf
path: .my.cnf
containers:
- name: mysql-exporter
image: registry.cn-hangzhou.aliyuncs.com/cmy-k8s/mysqld-exporter:v0.15.1
command:
- mysqld_exporter
- --config.my-cnf=/root/my.cnf
- --mysqld.address=mysql80-service.default.svc.cmy.com:3306
securityContext:
runAsUser: 0
ports:
- containerPort: 9104
#env:
#- name: DATA_SOURCE_NAME
# value: mysql_exporter:cmy@(mysql80-service.default.svc.cmy.com:3306)
volumeMounts:
- name: data
mountPath: /root/my.cnf
subPath: .my.cnf
---
apiVersion: v1
kind: Service
metadata:
name: mysql-exporter-service
labels:
apps: mysqld
spec:
selector:
apps: mysql-exporter
ports:
- protocol: TCP
port: 9104
targetPort: 9104
name: mysql80
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: cmy-mysql-smon
spec:
jobLabel: kubeadm-mysql-k8s-cmy
endpoints:
- interval: 3s
# 这里的端口可以写svc的端口号,也可以写svc的名称。
# 但我推荐写svc端口名称,这样svc就算修改了端口号,只要不修改svc端口的名称,那么我们此处就不用再次修改哟。
# port: 9104
port: mysql80
path: /metrics
scheme: http
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
apps: mysqld
[root@master231 servicemonitors]#
5.2 Prometheus访问测试
2.Prometheus访问测试
mysql_up
3.Grafana导入模板
7362