updated for infra

This commit is contained in:
Arseni
2026-03-10 20:40:20 +03:00
parent 9c2b3bf8bd
commit 840a7f85c8
13 changed files with 661 additions and 12 deletions

View File

@@ -0,0 +1,145 @@
secrets:
monitoring_vault_role_id:
external: true
monitoring_vault_secret_id:
external: true
networks:
cicd:
external: true
volumes:
loki_data:
grafana_data:
prometheus_data:
alertmanager_data:
alertmanager_config:
driver: local
driver_opts:
type: tmpfs
device: tmpfs
o: size=8m,uid=0,gid=0,mode=0755
vault_secrets:
driver: local
driver_opts:
type: tmpfs
device: tmpfs
o: size=32m,uid=472,gid=472,mode=0750
services:
vault-agent-monitoring:
image: hashicorp/vault:latest
networks: [cicd]
cap_add: ["IPC_LOCK"]
environment:
VAULT_ADDR: "http://vault:8200"
command: >
sh -lc 'vault agent -config=/etc/vault/agent.hcl'
secrets:
- source: monitoring_vault_role_id
target: /etc/vault/role_id
- source: monitoring_vault_secret_id
target: /etc/vault/secret_id
volumes:
- ./vault-agent/agent.hcl:/etc/vault/agent.hcl:ro
- ./vault-agent/templates:/etc/vault/templates:ro
- vault_secrets:/vault/secrets:rw
- alertmanager_config:/vault/alertmanager:rw
healthcheck:
test: ["CMD-SHELL", "test -s /vault/secrets/grafana.env"]
interval: 30s
timeout: 5s
retries: 3
deploy:
restart_policy:
condition: any
prometheus:
image: prom/prometheus:latest
networks: [cicd]
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=30d
- --web.enable-lifecycle
volumes:
- ./prometheus/config.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/ready"]
interval: 30s
timeout: 5s
retries: 3
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.sendico.io`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
deploy:
restart_policy:
condition: any
alertmanager:
image: prom/alertmanager:latest
networks: [cicd]
command: >
sh -c 'while [ ! -s /vault/alertmanager/alertmanager.yml ]; do echo "⏳ waiting for alertmanager.yml"; sleep 2; done;
exec /bin/alertmanager --config.file=/vault/alertmanager/alertmanager.yml --storage.path=/alertmanager'
volumes:
- alertmanager_data:/alertmanager
- alertmanager_config:/vault/alertmanager:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9093/-/ready"]
interval: 30s
timeout: 5s
retries: 3
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.sendico.io`)"
- "traefik.http.routers.alertmanager.entrypoints=websecure"
- "traefik.http.routers.alertmanager.tls.certresolver=letsencrypt"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
deploy:
restart_policy:
condition: any
loki:
image: grafana/loki:latest
networks: [cicd]
command: ["-config.file=/etc/loki/config.yml"]
volumes:
- ./loki/config.yml:/etc/loki/config.yml:ro
- loki_data:/loki
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
interval: 30s
timeout: 5s
retries: 5
deploy:
restart_policy:
condition: any
grafana:
image: grafana/grafana:latest
networks: [cicd]
command: >
sh -c 'while [ ! -s /vault/secrets/grafana.env ]; do echo "⏳ waiting for grafana.env"; sleep 2; done;
set -a; . /vault/secrets/grafana.env; set +a; exec /run.sh'
volumes:
- grafana_data:/var/lib/grafana
- vault_secrets:/vault/secrets:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"]
interval: 30s
timeout: 5s
retries: 5
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.sendico.io`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
deploy:
restart_policy:
condition: any

View File

@@ -0,0 +1,37 @@
# loki/config.yml — single-binary, filesystem-backed TSDB storage, 7-day retention
server:
http_listen_port: 3100
instance_addr: 127.0.0.1
common:
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: "2025-01-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: 168h
max_query_lookback: 168h
allow_structured_metadata: true
compactor:
working_directory: /loki/compactor
compaction_interval: 5m
retention_enabled: true
delete_request_store: filesystem

View File

@@ -0,0 +1,22 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: loki
static_configs:
- targets: ['loki:3100']
# Uncomment if Grafana metrics are enabled:
# - job_name: grafana
# static_configs:
# - targets: ['grafana:3000']