R2: load-test harness + contour resource observability
CI / changes (pull_request) Successful in 2s
CI / unit (pull_request) Successful in 9s
CI / integration (pull_request) Successful in 11s
CI / ui (pull_request) Successful in 38s
CI / gate (pull_request) Successful in 0s
CI / deploy (pull_request) Failing after 3s
CI / changes (pull_request) Successful in 2s
CI / unit (pull_request) Successful in 9s
CI / integration (pull_request) Successful in 11s
CI / ui (pull_request) Successful in 38s
CI / gate (pull_request) Successful in 0s
CI / deploy (pull_request) Failing after 3s
New scrabble/loadtest module (the pre-release stress harness): seeds 1000 guest + 10000 durable accounts with pre-created sessions directly in Postgres (token hash matches backend/internal/session), drives virtual players through the edge protocol (real 2-4p games assembled via invitations, mid-ranked legal moves generated locally by the embedded scrabble-solver — the edge carries no board, so the client replays history), plus nudge/chat/check-word/draft/profile/stats and a gateway-hammer that verifies the rate limiter. Prints a trip-report summary (per-op latency percentiles, result codes, live-event tally). Go unit tests cover the pure pieces; the DAWG-backed move test runs under BACKEND_DICT_DIR. Contour: add cAdvisor + postgres_exporter + a 'Scrabble - Resources' Grafana dashboard and the two Prometheus scrape jobs, for the R2/R7 stress-run resource baseline. CI: gate ./loadtest/... (path filter + vet/build/test). Docs: TESTING, ARCHITECTURE, project CLAUDE repo layout.
This commit is contained in:
@@ -225,6 +225,38 @@ services:
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks: [internal]
|
||||
|
||||
# cAdvisor exports per-container resource metrics (CPU / memory / network / disk)
|
||||
# for the R2/R7 stress runs' resource baseline. Prometheus scrapes it at :8080
|
||||
# over the internal network. It needs read access to the host's cgroup and
|
||||
# container state; --docker_only trims non-container cgroup series.
|
||||
cadvisor:
|
||||
container_name: scrabble-cadvisor
|
||||
image: gcr.io/cadvisor/cadvisor:v0.49.1
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
command: ["--docker_only=true", "--housekeeping_interval=15s"]
|
||||
devices:
|
||||
- /dev/kmsg
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
networks: [internal]
|
||||
|
||||
# postgres_exporter exports Postgres server metrics (connections, cache hit ratio,
|
||||
# transactions, database size). Prometheus scrapes it at :9187. The DSN reuses the
|
||||
# contour Postgres credentials; sslmode=disable on the internal network.
|
||||
postgres_exporter:
|
||||
container_name: scrabble-postgres-exporter
|
||||
image: prometheuscommunity/postgres-exporter:v0.16.0
|
||||
restart: unless-stopped
|
||||
depends_on: [postgres]
|
||||
environment:
|
||||
DATA_SOURCE_NAME: postgresql://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable
|
||||
networks: [internal]
|
||||
|
||||
networks:
|
||||
internal:
|
||||
name: scrabble-internal
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"uid": "scrabble-resources",
|
||||
"title": "Scrabble — Resources",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Postgres connections",
|
||||
"description": "Backends connected to the scrabble database (postgres_exporter).",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(pg_stat_database_numbackends{datname=\"scrabble\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Postgres cache hit ratio",
|
||||
"description": "blks_hit / (blks_hit + blks_read) over 5m.",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1 }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(pg_stat_database_blks_hit{datname=\"scrabble\"}[5m])) / clamp_min(sum(rate(pg_stat_database_blks_hit{datname=\"scrabble\"}[5m])) + sum(rate(pg_stat_database_blks_read{datname=\"scrabble\"}[5m])), 1)" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Postgres commits/s",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(pg_stat_database_xact_commit{datname=\"scrabble\"}[5m]))" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Database size",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(pg_database_size_bytes{datname=\"scrabble\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Container CPU (cores) by container",
|
||||
"description": "cAdvisor container_cpu_usage_seconds_total rate, per scrabble-* container (the load harness appears when run as --name scrabble-loadtest). Verify the metric name against live Prometheus if empty.",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "{{name}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Container memory (working set) by container",
|
||||
"description": "cAdvisor container_memory_working_set_bytes, per scrabble-* container.",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(container_memory_working_set_bytes{name=~\"scrabble-.+\"}) by (name)", "legendFormat": "{{name}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Container network I/O by container",
|
||||
"description": "cAdvisor receive (+) and transmit (-) byte rates per scrabble-* container.",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 },
|
||||
"fieldConfig": { "defaults": { "unit": "Bps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "sum(rate(container_network_receive_bytes_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "rx {{name}}" },
|
||||
{ "refId": "B", "expr": "-sum(rate(container_network_transmit_bytes_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "tx {{name}}" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Postgres transactions/s",
|
||||
"description": "Commit and rollback rates on the scrabble database (postgres_exporter).",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 13 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "sum(rate(pg_stat_database_xact_commit{datname=\"scrabble\"}[5m]))", "legendFormat": "commit" },
|
||||
{ "refId": "B", "expr": "sum(rate(pg_stat_database_xact_rollback{datname=\"scrabble\"}[5m]))", "legendFormat": "rollback" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -12,3 +12,12 @@ scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
# Container resource metrics (CPU/memory/network/disk) for every contour
|
||||
# container, for the R2/R7 stress runs' resource baseline.
|
||||
- job_name: cadvisor
|
||||
static_configs:
|
||||
- targets: ["cadvisor:8080"]
|
||||
# Postgres server metrics (connections, cache hit ratio, transactions, db size).
|
||||
- job_name: postgres_exporter
|
||||
static_configs:
|
||||
- targets: ["postgres_exporter:9187"]
|
||||
|
||||
Reference in New Issue
Block a user