25๋ ๋ AWS EKS Hands-on Study ์คํฐ๋ ์ ๋ฆฌ ๋ด์ฉ์ ๋๋ค.
ํ๋ก๋ฉํ ์ฐ์ค ์ค์น
wget https://github.com/prometheus/prometheus/releases/download/v3.2.0/prometheus-3.2.0.linux-amd64.tar.gz
tar -xvf prometheus-3.2.0.linux-amd64.tar.gz
cd prometheus-3.2.0.linux-amd64
mv prometheus /usr/local/bin/
mv promtool /usr/local/bin/
mkdir -p /etc/prometheus /var/lib/prometheus
mv prometheus.yml /etc/prometheus/
useradd --no-create-home --shell /sbin/nologin prometheus
chown -R prometheus:prometheus /etc/prometheus /var/lib/prometheus
chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool
tee /etc/systemd/system/prometheus.service > /dev/null <<EOF
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--web.listen-address=0.0.0.0:9090
[Install]
WantedBy=multi-user.target
EOF
๋ ธ๋ ์ต์คํฌํฐ ์ค์น
wget https://github.com/prometheus/node_exporter/releases/download/v1.9.0/node_exporter-1.9.0.linux-amd64.tar.gz
tar xvfz node_exporter-1.9.0.linux-amd64.tar.gz
cd node_exporter-1.9.0.linux-amd64
cp node_exporter /usr/local/bin/
groupadd -f node_exporter
useradd -g node_exporter --no-create-home --shell /sbin/nologin node_exporter
chown node_exporter:node_exporter /usr/local/bin/node_exporter
tee /etc/systemd/system/node_exporter.service > /dev/null <<EOF
[Unit]
Description=Node Exporter
Documentation=https://prometheus.io/docs/guides/node-exporter/
Wants=network-online.target
After=network-online.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
Restart=on-failure
ExecStart=/usr/local/bin/node_exporter \
--web.listen-address=:9200
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable --now node_exporter
systemctl status node_exporter
# ํ๋ก๋ฉํ
์ฐ์ค ์ผ๋ฏ ํ์ผ ์์
cat << EOF >> /etc/prometheus/prometheus.yml
- job_name: 'node_exporter'
static_configs:
- targets: ["127.0.0.1:9200"]
labels:
alias: 'myec2'
EOF
ํ๋ก๋ฉํ ์ฐ์ค ์คํ ์ค์น
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
cat <<EOT > monitor-values.yaml
prometheus:
prometheusSpec:
scrapeInterval: "15s"
evaluationInterval: "15s"
podMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
retention: 5d
retentionSize: "10GiB"
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: gp3
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 30Gi
ingress:
enabled: true
ingressClassName: alb
hosts:
- prometheus.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
grafana:
defaultDashboardsTimezone: Asia/Seoul
adminPassword: prom-operator
ingress:
enabled: true
ingressClassName: alb
hosts:
- grafana.$MyDomain
paths:
- /*
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
alb.ingress.kubernetes.io/success-codes: 200-399
alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
alb.ingress.kubernetes.io/group.name: study
alb.ingress.kubernetes.io/ssl-redirect: '443'
persistence:
enabled: true
type: sts
storageClassName: "gp3"
accessModes:
- ReadWriteOnce
size: 20Gi
alertmanager:
enabled: false
defaultRules:
create: false
kubeControllerManager:
enabled: false
kubeEtcd:
enabled: false
kubeScheduler:
enabled: false
prometheus-windows-exporter:
prometheus:
monitor:
enabled: false
EOT
cat monitor-values.yaml
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 69.3.1 \
-f monitor-values.yaml --create-namespace --namespace monitoring
helm list -n monitoring
NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION
kube-prometheus-stack monitoring 1 2025-03-01 22:04:29 +0900 KST deployed kube-prometheus-stack-69.3.1 v0.80.0
# ๋ฆฌ์์ค
NAME READY AGE
statefulset.apps/kube-prometheus-stack-grafana 0/1 35s
statefulset.apps/prometheus-kube-prometheus-stack-prometheus 1/1 30s
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
daemonset.apps/kube-prometheus-stack-prometheus-node-exporter 3 3 3 3 3 kubernetes.io/os=linux 35s
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/kube-prometheus-stack-kube-state-metrics 1/1 1 1 35s
deployment.apps/kube-prometheus-stack-operator 1/1 1 1 35s
NAME READY STATUS RESTARTS AGE
pod/kube-prometheus-stack-grafana-0 0/3 PodInitializing 0 35s
pod/kube-prometheus-stack-kube-state-metrics-***** 1/1 Running 0 35s
pod/kube-prometheus-stack-operator-***** 1/1 Running 0 35s
pod/kube-prometheus-stack-prometheus-node-exporter-***** 1/1 Running 0 35s
pod/prometheus-kube-prometheus-stack-prometheus-0 2/2 Running 0 30s
# ์๋น์ค
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/kube-prometheus-stack-grafana ClusterIP 10.100.***.*** <none> 80/TCP 35s
service/kube-prometheus-stack-prometheus ClusterIP 10.100.***.*** <none> 9090/TCP,8080/TCP 35s
# ์ธ๊ทธ๋ ์ค
NAME CLASS HOSTS ADDRESS PORTS AGE
ingress.networking.k8s.io/kube-prometheus-stack-grafana alb *****.$MyDomain myeks-ingress-alb-****.ap-northeast-2.elb.amazonaws.com 80 35s
ingress.networking.k8s.io/kube-prometheus-stack-prometheus alb *****.$MyDomain myeks-ingress-alb-****.ap-northeast-2.elb.amazonaws.com 80 35s
# pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
persistentvolumeclaim/prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0 Bound pvc-********** 30Gi RWO gp3 30s
persistentvolumeclaim/storage-kube-prometheus-stack-grafana-0 Bound pvc-********** 20Gi RWO gp3 35s
# pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS AGE
persistentvolume/pvc-********** 20Gi RWO Delete Bound monitoring/storage-kube-prometheus-stack-grafana-0 gp3 33s
persistentvolume/pvc-********** 30Gi RWO Delete Bound monitoring/prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0 gp3 27s
# ํ๋ก๋ฉํ
์ฐ์ค ์๋น์ค ๋ชฉ๋ก
NAME VERSION DESIRED READY RECONCILED AVAILABLE AGE
prometheus.monitoring.coreos.com/kube-prometheus-stack-prometheus v3.1.0 1 1 True True 79s
NAME AGE
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-apiserver 79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-coredns 79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-grafana 79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-kube-state-metrics 79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-kubelet 79s
# crd
alertmanagerconfigs.monitoring.coreos.com 2025-03-01T12:47:40Z
alertmanagers.monitoring.coreos.com 2025-03-01T12:47:40Z
podmonitors.monitoring.coreos.com 2025-03-01T12:47:41Z
prometheuses.monitoring.coreos.com 2025-03-01T12:47:42Z
servicemonitors.monitoring.coreos.com 2025-03-01T12:47:43Z
thanosrulers.monitoring.coreos.com 2025-03-01T12:47:44Z
# pv ์ฌ์ฉ๋
PV NAME PVC NAME NAMESPACE NODE NAME POD NAME VOLUME MOUNT NAME SIZE USED AVAILABLE %USED IUSED IFREE %IUSED
pvc-********** prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0 monitoring ip-**********.ap-northeast-2.compute.internal prometheus-kube-prometheus-stack-prometheus-0 prometheus-kube-prometheus-stack-prometheus-db 29Gi 250Mi 29Gi 0.82 9 15728631 0.00
pvc-********** storage-kube-prometheus-stack-grafana-0
ํ๋ ๋ชจ๋ํฐ
kubectl get podmonitor -n kube-system
NAME AGE
aws-cni-metrics 113s
kubectl get podmonitor -n kube-system aws-cni-metrics -o yaml | kubectl neat
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: aws-cni-metrics
namespace: kube-system
spec:
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
podMetricsEndpoints:
- interval: 30s
path: /metrics
port: metrics
selector:
matchLabels:
k8s-app: aws-node
๊ทธ๋ํ ํ์ธ
๊ทธ๋ผํ๋ ๋์๋ณด๋
nginx ์น ์๋ฒ ์ถ๊ฐ
kubectl get servicemonitor -n monitoring nginx -o yaml | kubectl neat
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
annotations:
meta.helm.sh/release-name: nginx
meta.helm.sh/release-namespace: default
labels:
app.kubernetes.io/instance: nginx
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: nginx
app.kubernetes.io/version: 1.27.4
helm.sh/chart: nginx-19.0.0
name: nginx
namespace: monitoring
spec:
endpoints:
- interval: 10s
path: /metrics
port: metrics
jobLabel: ""
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
app.kubernetes.io/instance: nginx
app.kubernetes.io/name: nginx
PromQL(Prometheus Query Language)
PromQL(Prometheus Query Language)์ ํ๋ก๋ฉํ ์ฐ์ค์์ ๋ฉํธ๋ฆญ ๋ฐ์ดํฐ๋ฅผ ์กฐํํ๊ณ ๋ถ์ํ๊ธฐ ์ํ ์ฟผ๋ฆฌ ์ธ์ด๋ก, ํ๋ก๋ฉํ ์ฐ์ค์์ ์์ง๋ ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ์ฌ ๋ฉํธ๋ฆญ ์กฐํํ๊ณ ํํฐ๋ง, ์ง๊ณ ๋ฑ์ ์ํํ์ฌ ๋ค์ํ๊ฒ ํ์ฉํ ์ ์๋ค.
ํ๋ก๋ฉํ ์ฐ์ค์์ ๋ฐ์ดํฐ๋ฅผ ์ ์ฅํ๋ ๋ฐฉ์์ ์๊ณ์ด ๋ฐ์ดํฐ(time-series data) ๋ผ๊ณ ํ๋ค. ์ด๋ ๋ฐ์ดํฐ๋ฅผ ์๊ฐ(time)๊ณผ ๊ฐ(value) ํํ๋ก ์ ์ฅํ๋ค.
์ปค์คํ ๋์๋ณด๋
variables ์ถ๊ฐํ์ฌ ๋์๋ณด๋ ํํฐ๋ง ๋์ ํ์ธ
# ์ธ๋ฐ์ด๋ ํธ๋ํฝ (Time series)
sum(rate(container_network_receive_bytes_total{namespace=~"$Namespace"}[5m])) by (namespace)
# ์์๋ฐ์ด๋ ํธ๋ํฝ (Time series)
sum(rate(container_network_transmit_bytes_total{namespace=~"$Namespace"}[5m])) by (namespace)
# ๋ ํ๋ฆฌ์นด์
๊ฐ์ (Stat)
count(kube_replicaset_status_ready_replicas{namespace=~"$Namespace"}) by (namespace)
# ์คํ์ค์ธ ํ๋ ๊ฐฏ์ (Stat)
count(kube_pod_info{namespace=~"$Namespace"}) by (namespace)
๋ค์์คํ์ด์ค๋ณ๋ก ์ธ๋ฐ์ด๋/์์๋ฐ์ด๋ ํธ๋ํฝ, ๋ ํ๋ฆฌ์นด์ ๊ฐ์, ํ๋ ๊ฐฏ์๋ฅผ ํํฐ๋งํ์๋ค.