updated for infra
This commit is contained in:
145
infra/monitoring/docker-compose.yml
Normal file
145
infra/monitoring/docker-compose.yml
Normal file
@@ -0,0 +1,145 @@
|
||||
secrets:
|
||||
monitoring_vault_role_id:
|
||||
external: true
|
||||
monitoring_vault_secret_id:
|
||||
external: true
|
||||
|
||||
networks:
|
||||
cicd:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
loki_data:
|
||||
grafana_data:
|
||||
prometheus_data:
|
||||
alertmanager_data:
|
||||
alertmanager_config:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: tmpfs
|
||||
device: tmpfs
|
||||
o: size=8m,uid=0,gid=0,mode=0755
|
||||
vault_secrets:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: tmpfs
|
||||
device: tmpfs
|
||||
o: size=32m,uid=472,gid=472,mode=0750
|
||||
|
||||
services:
|
||||
vault-agent-monitoring:
|
||||
image: hashicorp/vault:latest
|
||||
networks: [cicd]
|
||||
cap_add: ["IPC_LOCK"]
|
||||
environment:
|
||||
VAULT_ADDR: "http://vault:8200"
|
||||
command: >
|
||||
sh -lc 'vault agent -config=/etc/vault/agent.hcl'
|
||||
secrets:
|
||||
- source: monitoring_vault_role_id
|
||||
target: /etc/vault/role_id
|
||||
- source: monitoring_vault_secret_id
|
||||
target: /etc/vault/secret_id
|
||||
volumes:
|
||||
- ./vault-agent/agent.hcl:/etc/vault/agent.hcl:ro
|
||||
- ./vault-agent/templates:/etc/vault/templates:ro
|
||||
- vault_secrets:/vault/secrets:rw
|
||||
- alertmanager_config:/vault/alertmanager:rw
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "test -s /vault/secrets/grafana.env"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: any
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
networks: [cicd]
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --storage.tsdb.retention.time=30d
|
||||
- --web.enable-lifecycle
|
||||
volumes:
|
||||
- ./prometheus/config.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus_data:/prometheus
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/ready"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.prometheus.rule=Host(`prometheus.sendico.io`)"
|
||||
- "traefik.http.routers.prometheus.entrypoints=websecure"
|
||||
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: any
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
networks: [cicd]
|
||||
command: >
|
||||
sh -c 'while [ ! -s /vault/alertmanager/alertmanager.yml ]; do echo "⏳ waiting for alertmanager.yml"; sleep 2; done;
|
||||
exec /bin/alertmanager --config.file=/vault/alertmanager/alertmanager.yml --storage.path=/alertmanager'
|
||||
volumes:
|
||||
- alertmanager_data:/alertmanager
|
||||
- alertmanager_config:/vault/alertmanager:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9093/-/ready"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.sendico.io`)"
|
||||
- "traefik.http.routers.alertmanager.entrypoints=websecure"
|
||||
- "traefik.http.routers.alertmanager.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: any
|
||||
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
networks: [cicd]
|
||||
command: ["-config.file=/etc/loki/config.yml"]
|
||||
volumes:
|
||||
- ./loki/config.yml:/etc/loki/config.yml:ro
|
||||
- loki_data:/loki
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: any
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
networks: [cicd]
|
||||
command: >
|
||||
sh -c 'while [ ! -s /vault/secrets/grafana.env ]; do echo "⏳ waiting for grafana.env"; sleep 2; done;
|
||||
set -a; . /vault/secrets/grafana.env; set +a; exec /run.sh'
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- vault_secrets:/vault/secrets:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.grafana.rule=Host(`grafana.sendico.io`)"
|
||||
- "traefik.http.routers.grafana.entrypoints=websecure"
|
||||
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: any
|
||||
37
infra/monitoring/loki/config.yml
Normal file
37
infra/monitoring/loki/config.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
# loki/config.yml — single-binary, filesystem-backed TSDB storage, 7-day retention
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
instance_addr: 127.0.0.1
|
||||
|
||||
common:
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: "2025-01-01"
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: 168h
|
||||
max_query_lookback: 168h
|
||||
allow_structured_metadata: true
|
||||
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
compaction_interval: 5m
|
||||
retention_enabled: true
|
||||
delete_request_store: filesystem
|
||||
22
infra/monitoring/prometheus/config.yml
Normal file
22
infra/monitoring/prometheus/config.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ['alertmanager:9093']
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: loki
|
||||
static_configs:
|
||||
- targets: ['loki:3100']
|
||||
|
||||
# Uncomment if Grafana metrics are enabled:
|
||||
# - job_name: grafana
|
||||
# static_configs:
|
||||
# - targets: ['grafana:3000']
|
||||
Reference in New Issue
Block a user