[AEWS] #4์ฃผ์ฐจ ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค ์‹ค์Šต (3)

25๋…„๋„ AWS EKS Hands-on Study ์Šคํ„ฐ๋”” ์ •๋ฆฌ ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค.

 

ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค ์„ค์น˜

wget https://github.com/prometheus/prometheus/releases/download/v3.2.0/prometheus-3.2.0.linux-amd64.tar.gz

tar -xvf prometheus-3.2.0.linux-amd64.tar.gz
cd prometheus-3.2.0.linux-amd64

mv prometheus /usr/local/bin/
mv promtool /usr/local/bin/
mkdir -p /etc/prometheus /var/lib/prometheus
mv prometheus.yml /etc/prometheus/

useradd --no-create-home --shell /sbin/nologin prometheus
chown -R prometheus:prometheus /etc/prometheus /var/lib/prometheus
chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool

tee /etc/systemd/system/prometheus.service > /dev/null <<EOF
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target

[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/prometheus \
  --config.file=/etc/prometheus/prometheus.yml \
  --storage.tsdb.path=/var/lib/prometheus \
  --web.listen-address=0.0.0.0:9090

[Install]
WantedBy=multi-user.target
EOF

 

 

๋…ธ๋“œ ์ต์Šคํฌํ„ฐ ์„ค์น˜

wget https://github.com/prometheus/node_exporter/releases/download/v1.9.0/node_exporter-1.9.0.linux-amd64.tar.gz
tar xvfz node_exporter-1.9.0.linux-amd64.tar.gz
cd node_exporter-1.9.0.linux-amd64
cp node_exporter /usr/local/bin/


groupadd -f node_exporter
useradd -g node_exporter --no-create-home --shell /sbin/nologin node_exporter
chown node_exporter:node_exporter /usr/local/bin/node_exporter

tee /etc/systemd/system/node_exporter.service > /dev/null <<EOF
[Unit]
Description=Node Exporter
Documentation=https://prometheus.io/docs/guides/node-exporter/
Wants=network-online.target
After=network-online.target

[Service]
User=node_exporter
Group=node_exporter
Type=simple
Restart=on-failure
ExecStart=/usr/local/bin/node_exporter \
  --web.listen-address=:9200

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable --now node_exporter
systemctl status node_exporter

# ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค ์•ผ๋ฏˆ ํŒŒ์ผ ์ˆ˜์ •
cat << EOF >> /etc/prometheus/prometheus.yml

  - job_name: 'node_exporter'
    static_configs:
      - targets: ["127.0.0.1:9200"]
        labels:
          alias: 'myec2'
EOF

 

ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค ์Šคํƒ ์„ค์น˜

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts

cat <<EOT > monitor-values.yaml
prometheus:
  prometheusSpec:
    scrapeInterval: "15s"
    evaluationInterval: "15s"
    podMonitorSelectorNilUsesHelmValues: false
    serviceMonitorSelectorNilUsesHelmValues: false
    retention: 5d
    retentionSize: "10GiB"
    storageSpec:
      volumeClaimTemplate:
        spec:
          storageClassName: gp3
          accessModes: ["ReadWriteOnce"]
          resources:
            requests:
              storage: 30Gi

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - prometheus.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

grafana:
  defaultDashboardsTimezone: Asia/Seoul
  adminPassword: prom-operator

  ingress:
    enabled: true
    ingressClassName: alb
    hosts: 
      - grafana.$MyDomain
    paths: 
      - /*
    annotations:
      alb.ingress.kubernetes.io/scheme: internet-facing
      alb.ingress.kubernetes.io/target-type: ip
      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN
      alb.ingress.kubernetes.io/success-codes: 200-399
      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb
      alb.ingress.kubernetes.io/group.name: study
      alb.ingress.kubernetes.io/ssl-redirect: '443'

  persistence:
    enabled: true
    type: sts
    storageClassName: "gp3"
    accessModes:
      - ReadWriteOnce
    size: 20Gi

alertmanager:
  enabled: false
defaultRules:
  create: false
kubeControllerManager:
  enabled: false
kubeEtcd:
  enabled: false
kubeScheduler:
  enabled: false
prometheus-windows-exporter:
  prometheus:
    monitor:
      enabled: false
EOT
cat monitor-values.yaml

helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 69.3.1 \
-f monitor-values.yaml --create-namespace --namespace monitoring
helm list -n monitoring
NAME                 	NAMESPACE  	REVISION	UPDATED                               	STATUS  	CHART                       	APP VERSION
kube-prometheus-stack	monitoring	1       	2025-03-01 22:04:29 +0900 KST	deployed	kube-prometheus-stack-69.3.1	v0.80.0

# ๋ฆฌ์†Œ์Šค
NAME                                                            READY   AGE
statefulset.apps/kube-prometheus-stack-grafana                 0/1     35s
statefulset.apps/prometheus-kube-prometheus-stack-prometheus   1/1     30s

NAME                                                            DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR            AGE
daemonset.apps/kube-prometheus-stack-prometheus-node-exporter   3         3         3       3            3           kubernetes.io/os=linux   35s

NAME                                                       READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/kube-prometheus-stack-kube-state-metrics   1/1     1            1           35s
deployment.apps/kube-prometheus-stack-operator             1/1     1            1           35s

NAME                                                           READY   STATUS            RESTARTS   AGE
pod/kube-prometheus-stack-grafana-0                            0/3     PodInitializing   0          35s
pod/kube-prometheus-stack-kube-state-metrics-*****            1/1     Running           0          35s
pod/kube-prometheus-stack-operator-*****                      1/1     Running           0          35s
pod/kube-prometheus-stack-prometheus-node-exporter-*****      1/1     Running           0          35s
pod/prometheus-kube-prometheus-stack-prometheus-0              2/2     Running           0          30s

# ์„œ๋น„์Šค
NAME                                                     TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)             AGE
service/kube-prometheus-stack-grafana                    ClusterIP   10.100.***.***   <none>        80/TCP              35s
service/kube-prometheus-stack-prometheus                 ClusterIP   10.100.***.***   <none>        9090/TCP,8080/TCP   35s

# ์ธ๊ทธ๋ ˆ์Šค
NAME                                             CLASS   HOSTS                            ADDRESS                                                         PORTS   AGE
ingress.networking.k8s.io/kube-prometheus-stack-grafana      alb     *****.$MyDomain      myeks-ingress-alb-****.ap-northeast-2.elb.amazonaws.com   80      35s
ingress.networking.k8s.io/kube-prometheus-stack-prometheus   alb     *****.$MyDomain      myeks-ingress-alb-****.ap-northeast-2.elb.amazonaws.com   80      35s

# pvc
NAME                                                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
persistentvolumeclaim/prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0   Bound    pvc-**********   30Gi       RWO            gp3            30s
persistentvolumeclaim/storage-kube-prometheus-stack-grafana-0                                              Bound    pvc-**********   20Gi       RWO            gp3            35s

# pv
NAME                                      CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                                                                                     STORAGECLASS   AGE
persistentvolume/pvc-**********            20Gi       RWO            Delete           Bound    monitoring/storage-kube-prometheus-stack-grafana-0                                                        gp3            33s
persistentvolume/pvc-**********            30Gi       RWO            Delete           Bound    monitoring/prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0   gp3            27s

# ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค ์„œ๋น„์Šค ๋ชฉ๋ก
NAME                                                                VERSION   DESIRED   READY   RECONCILED   AVAILABLE   AGE
prometheus.monitoring.coreos.com/kube-prometheus-stack-prometheus   v3.1.0    1         1       True         True        79s

NAME                                                                                  AGE
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-apiserver                  79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-coredns                    79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-grafana                    79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-kube-state-metrics         79s
servicemonitor.monitoring.coreos.com/kube-prometheus-stack-kubelet                    79s

# crd
alertmanagerconfigs.monitoring.coreos.com    2025-03-01T12:47:40Z
alertmanagers.monitoring.coreos.com          2025-03-01T12:47:40Z
podmonitors.monitoring.coreos.com            2025-03-01T12:47:41Z
prometheuses.monitoring.coreos.com           2025-03-01T12:47:42Z
servicemonitors.monitoring.coreos.com        2025-03-01T12:47:43Z
thanosrulers.monitoring.coreos.com           2025-03-01T12:47:44Z

# pv ์‚ฌ์šฉ๋Ÿ‰
 PV NAME                                   PVC NAME                                                                                      NAMESPACE   NODE NAME                                         POD NAME                                       VOLUME MOUNT NAME                               SIZE  USED   AVAILABLE  %USED  IUSED  IFREE     %IUSED
 pvc-**********  prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0  monitoring  ip-**********.ap-northeast-2.compute.internal  prometheus-kube-prometheus-stack-prometheus-0  prometheus-kube-prometheus-stack-prometheus-db  29Gi  250Mi  29Gi       0.82   9      15728631  0.00
 pvc-**********  storage-kube-prometheus-stack-grafana-0

ํŒŒ๋“œ ๋ชจ๋‹ˆํ„ฐ

kubectl get podmonitor -n kube-system
NAME              AGE
aws-cni-metrics   113s

kubectl get podmonitor -n kube-system aws-cni-metrics -o yaml | kubectl neat
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
  name: aws-cni-metrics
  namespace: kube-system
spec:
  jobLabel: k8s-app
  namespaceSelector:
    matchNames:
    - kube-system
  podMetricsEndpoints:
  - interval: 30s
    path: /metrics
    port: metrics
  selector:
    matchLabels:
      k8s-app: aws-node

 

๊ทธ๋ž˜ํ”„ ํ™•์ธ

 

 

๊ทธ๋ผํŒŒ๋‚˜ ๋Œ€์‹œ๋ณด๋“œ

nginx ์›น ์„œ๋ฒ„ ์ถ”๊ฐ€

kubectl get servicemonitor -n monitoring nginx -o yaml | kubectl neat
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  annotations:
    meta.helm.sh/release-name: nginx
    meta.helm.sh/release-namespace: default
  labels:
    app.kubernetes.io/instance: nginx
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: nginx
    app.kubernetes.io/version: 1.27.4
    helm.sh/chart: nginx-19.0.0
  name: nginx
  namespace: monitoring
spec:
  endpoints:
  - interval: 10s
    path: /metrics
    port: metrics
  jobLabel: ""
  namespaceSelector:
    matchNames:
    - default
  selector:
    matchLabels:
      app.kubernetes.io/instance: nginx
      app.kubernetes.io/name: nginx

 

 

 

PromQL(Prometheus Query Language)

PromQL(Prometheus Query Language)์€ ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค์—์„œ ๋ฉ”ํŠธ๋ฆญ ๋ฐ์ดํ„ฐ๋ฅผ ์กฐํšŒํ•˜๊ณ  ๋ถ„์„ํ•˜๊ธฐ ์œ„ํ•œ ์ฟผ๋ฆฌ ์–ธ์–ด๋กœ, ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค์—์„œ ์ˆ˜์ง‘๋œ ๋ฐ์ดํ„ฐ๋ฅผ ํ™œ์šฉํ•˜์—ฌ ๋ฉ”ํŠธ๋ฆญ ์กฐํšŒํ•˜๊ณ  ํ•„ํ„ฐ๋ง, ์ง‘๊ณ„ ๋“ฑ์„ ์ˆ˜ํ–‰ํ•˜์—ฌ ๋‹ค์–‘ํ•˜๊ฒŒ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋‹ค.

 

ํ”„๋กœ๋ฉ”ํ…Œ์šฐ์Šค์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ์ €์žฅํ•˜๋Š” ๋ฐฉ์‹์€ ์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ(time-series data) ๋ผ๊ณ  ํ•œ๋‹ค. ์ด๋Š” ๋ฐ์ดํ„ฐ๋ฅผ ์‹œ๊ฐ„(time)๊ณผ ๊ฐ’(value) ํ˜•ํƒœ๋กœ ์ €์žฅํ•œ๋‹ค.

 

 

์ปค์Šคํ…€ ๋Œ€์‹œ๋ณด๋“œ

 

variables ์ถ”๊ฐ€ํ•˜์—ฌ ๋Œ€์‹œ๋ณด๋“œ ํ•„ํ„ฐ๋ง ๋™์ž‘ ํ™•์ธ

 

 

 

# ์ธ๋ฐ”์šด๋“œ ํŠธ๋ž˜ํ”ฝ (Time series)
sum(rate(container_network_receive_bytes_total{namespace=~"$Namespace"}[5m])) by (namespace)

# ์•„์›ƒ๋ฐ”์šด๋“œ ํŠธ๋ž˜ํ”ฝ (Time series)
sum(rate(container_network_transmit_bytes_total{namespace=~"$Namespace"}[5m])) by (namespace)

# ๋ ˆํ”Œ๋ฆฌ์นด์…‹ ๊ฐœ์ˆ˜ (Stat)
count(kube_replicaset_status_ready_replicas{namespace=~"$Namespace"}) by (namespace)

# ์‹คํ–‰์ค‘์ธ ํŒŒ๋“œ ๊ฐฏ์ˆ˜ (Stat)
count(kube_pod_info{namespace=~"$Namespace"}) by (namespace)

 

๋„ค์ž„์ŠคํŽ˜์ด์Šค๋ณ„๋กœ ์ธ๋ฐ”์šด๋“œ/์•„์›ƒ๋ฐ”์šด๋“œ ํŠธ๋ž˜ํ”ฝ, ๋ ˆํ”Œ๋ฆฌ์นด์…‹ ๊ฐœ์ˆ˜, ํŒŒ๋“œ ๊ฐฏ์ˆ˜๋ฅผ ํ•„ํ„ฐ๋งํ•˜์˜€๋‹ค.

 

์•Œ๋Ÿฟ