R7: final stress run + tuning #38
+70
-20
@@ -39,6 +39,13 @@ services:
|
|||||||
retries: 30
|
retries: 30
|
||||||
volumes:
|
volumes:
|
||||||
- postgres-data:/var/lib/postgresql/data
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
# R7 starting limits: 512M leaves headroom over the default 128 MB shared_buffers +
|
||||||
|
# per-connection memory (R2 peaked at 28 backends / 69 MiB RSS); tighten after the run.
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "2.0"
|
||||||
|
memory: 512M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
@@ -65,8 +72,19 @@ services:
|
|||||||
BACKEND_OTEL_METRICS_EXPORTER: otlp
|
BACKEND_OTEL_METRICS_EXPORTER: otlp
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||||
|
# GOMAXPROCS matches the CPU limit below so the Go scheduler aligns with the
|
||||||
|
# cgroup quota (the runtime otherwise sees all of the host's cores).
|
||||||
|
GOMAXPROCS: "2"
|
||||||
# No container healthcheck: the distroless image has no shell/wget. Readiness
|
# No container healthcheck: the distroless image has no shell/wget. Readiness
|
||||||
# is covered by the CI post-deploy probe (GET / through caddy).
|
# is covered by the CI post-deploy probe (GET / through caddy).
|
||||||
|
# R7 starting limits (generous over the R2 ~1-core / <=100 MiB peak); tightened to
|
||||||
|
# the agreed prod values after the final stress run. deploy.resources.limits is
|
||||||
|
# honoured by `docker compose up` (Compose v2), not only by swarm.
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "2.0"
|
||||||
|
memory: 512M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
gateway:
|
gateway:
|
||||||
@@ -97,8 +115,17 @@ services:
|
|||||||
GATEWAY_OTEL_METRICS_EXPORTER: otlp
|
GATEWAY_OTEL_METRICS_EXPORTER: otlp
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||||
|
# GOMAXPROCS matches the CPU limit below (see backend).
|
||||||
|
GOMAXPROCS: "2"
|
||||||
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
|
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
|
||||||
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
||||||
|
# R7 starting limits (generous over the R2 ~1-core / <=100 MiB peak); tighten after
|
||||||
|
# the final stress run.
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "2.0"
|
||||||
|
memory: 512M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
# --- Landing (static) -------------------------------------------------------
|
# --- Landing (static) -------------------------------------------------------
|
||||||
@@ -121,6 +148,10 @@ services:
|
|||||||
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
||||||
VITE_APP_VERSION: ${APP_VERSION:-dev}
|
VITE_APP_VERSION: ${APP_VERSION:-dev}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
||||||
@@ -167,6 +198,13 @@ services:
|
|||||||
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
|
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||||
|
# The connector is light (the stress run does not drive Telegram); one P suffices.
|
||||||
|
GOMAXPROCS: "1"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "1.0"
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway;
|
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway;
|
||||||
# the catch-all incl. the landing -> the static landing container) -------
|
# the catch-all incl. the landing -> the static landing container) -------
|
||||||
@@ -183,6 +221,10 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${SCRABBLE_CONFIG_DIR:-.}/caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
- ${SCRABBLE_CONFIG_DIR:-.}/caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
||||||
- caddy-data:/data
|
- caddy-data:/data
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128M
|
||||||
networks:
|
networks:
|
||||||
internal: {}
|
internal: {}
|
||||||
edge:
|
edge:
|
||||||
@@ -194,8 +236,19 @@ services:
|
|||||||
image: otel/opentelemetry-collector-contrib:0.119.0
|
image: otel/opentelemetry-collector-contrib:0.119.0
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command: ["--config=/etc/otelcol/config.yaml"]
|
command: ["--config=/etc/otelcol/config.yaml"]
|
||||||
|
# The docker_stats receiver reads per-container metrics from the Docker API, so the
|
||||||
|
# collector (image UID 10001) joins the host's docker group to read the socket —
|
||||||
|
# DOCKER_GID defaults to the contour host's 989; set it for other hosts (prod). The
|
||||||
|
# socket is mounted read-only. This replaces cAdvisor, whose per-container metrics
|
||||||
|
# are empty on this host (separate-XFS /var/lib/docker).
|
||||||
|
group_add: ["${DOCKER_GID:-989}"]
|
||||||
volumes:
|
volumes:
|
||||||
- ${SCRABBLE_CONFIG_DIR:-.}/otelcol/config.yaml:/etc/otelcol/config.yaml:ro
|
- ${SCRABBLE_CONFIG_DIR:-.}/otelcol/config.yaml:/etc/otelcol/config.yaml:ro
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
@@ -208,6 +261,10 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${SCRABBLE_CONFIG_DIR:-.}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
- ${SCRABBLE_CONFIG_DIR:-.}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
- prometheus-data:/prometheus
|
- prometheus-data:/prometheus
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
tempo:
|
tempo:
|
||||||
@@ -218,6 +275,11 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${SCRABBLE_CONFIG_DIR:-.}/tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
- ${SCRABBLE_CONFIG_DIR:-.}/tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
||||||
- tempo-data:/var/tempo
|
- tempo-data:/var/tempo
|
||||||
|
# tempo peaked at ~446 MiB in R2; 1G leaves headroom for the final run.
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1G
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
grafana:
|
grafana:
|
||||||
@@ -247,26 +309,10 @@ services:
|
|||||||
# provider logs "no such file or directory").
|
# provider logs "no such file or directory").
|
||||||
- ${SCRABBLE_CONFIG_DIR:-.}/grafana/dashboards:/etc/grafana/dashboards:ro
|
- ${SCRABBLE_CONFIG_DIR:-.}/grafana/dashboards:/etc/grafana/dashboards:ro
|
||||||
- grafana-data:/var/lib/grafana
|
- grafana-data:/var/lib/grafana
|
||||||
networks: [internal]
|
deploy:
|
||||||
|
resources:
|
||||||
# cAdvisor exports per-container resource metrics (CPU / memory / network / disk)
|
limits:
|
||||||
# for the R2/R7 stress runs' resource baseline. Prometheus scrapes it at :8080
|
memory: 512M
|
||||||
# over the internal network. It needs read access to the host's cgroup and
|
|
||||||
# container state; --docker_only trims non-container cgroup series.
|
|
||||||
cadvisor:
|
|
||||||
container_name: scrabble-cadvisor
|
|
||||||
image: gcr.io/cadvisor/cadvisor:v0.49.1
|
|
||||||
restart: unless-stopped
|
|
||||||
privileged: true
|
|
||||||
command: ["--docker_only=true", "--housekeeping_interval=15s"]
|
|
||||||
devices:
|
|
||||||
- /dev/kmsg
|
|
||||||
volumes:
|
|
||||||
- /:/rootfs:ro
|
|
||||||
- /var/run:/var/run:ro
|
|
||||||
- /sys:/sys:ro
|
|
||||||
- /var/lib/docker/:/var/lib/docker:ro
|
|
||||||
- /dev/disk/:/dev/disk:ro
|
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
# postgres_exporter exports Postgres server metrics (connections, cache hit ratio,
|
# postgres_exporter exports Postgres server metrics (connections, cache hit ratio,
|
||||||
@@ -279,6 +325,10 @@ services:
|
|||||||
depends_on: [postgres]
|
depends_on: [postgres]
|
||||||
environment:
|
environment:
|
||||||
DATA_SOURCE_NAME: postgresql://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable
|
DATA_SOURCE_NAME: postgresql://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128M
|
||||||
networks: [internal]
|
networks: [internal]
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"tags": ["scrabble"],
|
"tags": ["scrabble"],
|
||||||
"timezone": "",
|
"timezone": "",
|
||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"version": 1,
|
"version": 2,
|
||||||
"refresh": "30s",
|
"refresh": "30s",
|
||||||
"time": { "from": "now-1h", "to": "now" },
|
"time": { "from": "now-1h", "to": "now" },
|
||||||
"panels": [
|
"panels": [
|
||||||
@@ -43,30 +43,30 @@
|
|||||||
{
|
{
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Container CPU (cores) by container",
|
"title": "Container CPU (cores) by container",
|
||||||
"description": "cAdvisor container_cpu_usage_seconds_total rate, per scrabble-* container (the load harness appears when run as --name scrabble-loadtest). Verify the metric name against live Prometheus if empty.",
|
"description": "docker_stats container.cpu.utilization (a gauge where 100 == one core) / 100, per scrabble-* container; the load harness appears when run as --name scrabble-loadtest. Verify the scaling against live Prometheus.",
|
||||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
|
||||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
"targets": [{ "refId": "A", "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "{{name}}" }]
|
"targets": [{ "refId": "A", "expr": "max(container_cpu_utilization{container_name=~\"scrabble-.+\"}) by (container_name) / 100", "legendFormat": "{{container_name}}" }]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Container memory (working set) by container",
|
"title": "Container memory (usage) by container",
|
||||||
"description": "cAdvisor container_memory_working_set_bytes, per scrabble-* container.",
|
"description": "docker_stats container.memory.usage.total bytes, per scrabble-* container.",
|
||||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
|
||||||
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
"targets": [{ "refId": "A", "expr": "max(container_memory_working_set_bytes{name=~\"scrabble-.+\"}) by (name)", "legendFormat": "{{name}}" }]
|
"targets": [{ "refId": "A", "expr": "max(container_memory_usage_total{container_name=~\"scrabble-.+\"}) by (container_name)", "legendFormat": "{{container_name}}" }]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Container network I/O by container",
|
"title": "Container network I/O by container",
|
||||||
"description": "cAdvisor receive (+) and transmit (-) byte rates per scrabble-* container.",
|
"description": "docker_stats receive (+) and transmit (-) byte rates per scrabble-* container (summed across interfaces).",
|
||||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 },
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 },
|
||||||
"fieldConfig": { "defaults": { "unit": "Bps" }, "overrides": [] },
|
"fieldConfig": { "defaults": { "unit": "Bps" }, "overrides": [] },
|
||||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
"targets": [
|
"targets": [
|
||||||
{ "refId": "A", "expr": "sum(rate(container_network_receive_bytes_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "rx {{name}}" },
|
{ "refId": "A", "expr": "sum(rate(container_network_io_usage_rx_bytes{container_name=~\"scrabble-.+\"}[5m])) by (container_name)", "legendFormat": "rx {{container_name}}" },
|
||||||
{ "refId": "B", "expr": "-sum(rate(container_network_transmit_bytes_total{name=~\"scrabble-.+\"}[5m])) by (name)", "legendFormat": "tx {{name}}" }
|
{ "refId": "B", "expr": "-sum(rate(container_network_io_usage_tx_bytes{container_name=~\"scrabble-.+\"}[5m])) by (container_name)", "legendFormat": "tx {{container_name}}" }
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -6,6 +6,22 @@ receivers:
|
|||||||
protocols:
|
protocols:
|
||||||
grpc:
|
grpc:
|
||||||
endpoint: 0.0.0.0:4317
|
endpoint: 0.0.0.0:4317
|
||||||
|
# Per-container resource metrics (CPU / memory / network) read straight from the
|
||||||
|
# Docker API. This replaces cAdvisor, which on the contour host resolves only the
|
||||||
|
# root cgroup (its /var/lib/docker is a separate XFS mount), and works the same in
|
||||||
|
# prod. The collector reaches the socket via group_add in docker-compose.yml.
|
||||||
|
# collection_interval matches Prometheus' 30s scrape. container.cpu.utilization is a
|
||||||
|
# gauge where 100 == one core (it mirrors `docker stats` CPU%).
|
||||||
|
docker_stats:
|
||||||
|
endpoint: unix:///var/run/docker.sock
|
||||||
|
# The receiver defaults to Docker API 1.25, but the daemon's minimum is 1.40
|
||||||
|
# (server speaks up to 1.54); pin a version both the receiver's client and the
|
||||||
|
# daemon accept, else the receiver fails to start ("client version too old").
|
||||||
|
api_version: "1.44"
|
||||||
|
collection_interval: 30s
|
||||||
|
metrics:
|
||||||
|
container.cpu.utilization:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
processors:
|
processors:
|
||||||
batch: {}
|
batch: {}
|
||||||
@@ -33,6 +49,6 @@ service:
|
|||||||
processors: [batch]
|
processors: [batch]
|
||||||
exporters: [otlp/tempo]
|
exporters: [otlp/tempo]
|
||||||
metrics:
|
metrics:
|
||||||
receivers: [otlp]
|
receivers: [otlp, docker_stats]
|
||||||
processors: [batch]
|
processors: [batch]
|
||||||
exporters: [prometheus]
|
exporters: [prometheus]
|
||||||
|
|||||||
@@ -6,17 +6,14 @@ global:
|
|||||||
evaluation_interval: 30s
|
evaluation_interval: 30s
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
|
# otelcol exposes both the services' OTLP metrics and the docker_stats receiver's
|
||||||
|
# per-container resource metrics (CPU/memory/network) on one endpoint.
|
||||||
- job_name: otelcol
|
- job_name: otelcol
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["otelcol:9464"]
|
- targets: ["otelcol:9464"]
|
||||||
- job_name: prometheus
|
- job_name: prometheus
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["localhost:9090"]
|
- targets: ["localhost:9090"]
|
||||||
# Container resource metrics (CPU/memory/network/disk) for every contour
|
|
||||||
# container, for the R2/R7 stress runs' resource baseline.
|
|
||||||
- job_name: cadvisor
|
|
||||||
static_configs:
|
|
||||||
- targets: ["cadvisor:8080"]
|
|
||||||
# Postgres server metrics (connections, cache hit ratio, transactions, db size).
|
# Postgres server metrics (connections, cache hit ratio, transactions, db size).
|
||||||
- job_name: postgres_exporter
|
- job_name: postgres_exporter
|
||||||
static_configs:
|
static_configs:
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"scrabble/loadtest/internal/edge"
|
|
||||||
"scrabble/loadtest/internal/moves"
|
"scrabble/loadtest/internal/moves"
|
||||||
"scrabble/loadtest/internal/report"
|
"scrabble/loadtest/internal/report"
|
||||||
"scrabble/loadtest/internal/scenario"
|
"scrabble/loadtest/internal/scenario"
|
||||||
@@ -114,7 +113,7 @@ func cmdRun(ctx context.Context, log *slog.Logger, args []string) error {
|
|||||||
log.Info("seeded", "durable", len(pool.Durables), "guest", len(pool.Guests))
|
log.Info("seeded", "durable", len(pool.Durables), "guest", len(pool.Guests))
|
||||||
|
|
||||||
rec := report.New()
|
rec := report.New()
|
||||||
drv := scenario.NewDriver(edge.New(*gateway), reg, rec, log)
|
drv := scenario.NewDriver(*gateway, reg, rec, log)
|
||||||
cfg := scenario.RealisticConfig{
|
cfg := scenario.RealisticConfig{
|
||||||
Steps: steps, StepDur: *stepDur, GamesPerPlayer: *gpp,
|
Steps: steps, StepDur: *stepDur, GamesPerPlayer: *gpp,
|
||||||
Tick: *tick, SecondaryProb: *secProb,
|
Tick: *tick, SecondaryProb: *secProb,
|
||||||
|
|||||||
@@ -41,16 +41,18 @@ const (
|
|||||||
msgEnqueue = "lobby.enqueue"
|
msgEnqueue = "lobby.enqueue"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Client speaks the edge protocol to a single gateway base URL over h2c. It is safe
|
// Client speaks the edge protocol to a single gateway base URL over h2c. The harness
|
||||||
// for concurrent use by many virtual players (the underlying http2.Transport pools
|
// builds one Client per virtual player, so each player owns its h2c connection (its
|
||||||
// and multiplexes connections).
|
// Subscribe stream and Execute calls share it) the way a real client does; a single
|
||||||
|
// Client is safe for that player's own concurrent goroutines.
|
||||||
type Client struct {
|
type Client struct {
|
||||||
rpc edgev1connect.GatewayClient
|
rpc edgev1connect.GatewayClient
|
||||||
}
|
}
|
||||||
|
|
||||||
// New builds a Client for baseURL (for example http://gateway:8081). The transport
|
// New builds a Client for baseURL (for example http://gateway:8081). The transport
|
||||||
// speaks HTTP/2 cleartext (h2c) to match the gateway, dialling plaintext TCP rather
|
// speaks HTTP/2 cleartext (h2c) to match the gateway, dialling plaintext TCP rather
|
||||||
// than TLS.
|
// than TLS. Each virtual player gets its own Client (hence its own connection), so the
|
||||||
|
// load mirrors real clients instead of multiplexing every player over one transport.
|
||||||
func New(baseURL string) *Client {
|
func New(baseURL string) *Client {
|
||||||
hc := &http.Client{
|
hc := &http.Client{
|
||||||
Transport: &http2.Transport{
|
Transport: &http2.Transport{
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ func (d *Driver) assembleCohort(ctx context.Context, cohort []seed.Account, game
|
|||||||
if len(cohort) < 2 {
|
if len(cohort) < 2 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
c := edge.New(d.gateway) // one client for the assembly burst; players play on their own
|
||||||
gamesOf := make(map[string]int, len(cohort))
|
gamesOf := make(map[string]int, len(cohort))
|
||||||
var games []*Game
|
var games []*Game
|
||||||
for i := range cohort {
|
for i := range cohort {
|
||||||
@@ -51,7 +52,7 @@ func (d *Driver) assembleCohort(ctx context.Context, cohort []seed.Account, game
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
variant := moves.Variants()[rng.Intn(len(moves.Variants()))]
|
variant := moves.Variants()[rng.Intn(len(moves.Variants()))]
|
||||||
g, err := d.assemble(ctx, members, variant)
|
g, err := d.assemble(ctx, c, members, variant)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d.log.Debug("assemble game", "err", err)
|
d.log.Debug("assemble game", "err", err)
|
||||||
break
|
break
|
||||||
@@ -85,7 +86,7 @@ func pickMembers(cohort []seed.Account, inviter seed.Account, rng *rand.Rand) []
|
|||||||
// assemble runs the invitation flow for one game: the inviter (members[0]) invites
|
// assemble runs the invitation flow for one game: the inviter (members[0]) invites
|
||||||
// the rest, each invitee accepts the pending invitation, and the completing accept
|
// the rest, each invitee accepts the pending invitation, and the completing accept
|
||||||
// starts the game, which is then located in the inviter's game list.
|
// starts the game, which is then located in the inviter's game list.
|
||||||
func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant string) (*Game, error) {
|
func (d *Driver) assemble(ctx context.Context, c *edge.Client, members []seed.Account, variant string) (*Game, error) {
|
||||||
inviter := members[0]
|
inviter := members[0]
|
||||||
inviteeIDs := make([]string, len(members)-1)
|
inviteeIDs := make([]string, len(members)-1)
|
||||||
for i, m := range members[1:] {
|
for i, m := range members[1:] {
|
||||||
@@ -93,7 +94,7 @@ func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant s
|
|||||||
}
|
}
|
||||||
|
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
code, err := d.edge.CreateInvitation(ctx, inviter.Token, inviteeIDs, variant)
|
code, err := c.CreateInvitation(ctx, inviter.Token, inviteeIDs, variant)
|
||||||
d.rec.Record("invitation.create", code, time.Since(t0))
|
d.rec.Record("invitation.create", code, time.Since(t0))
|
||||||
if err != nil || code != "ok" {
|
if err != nil || code != "ok" {
|
||||||
return nil, fmt.Errorf("invitation.create: %s", code)
|
return nil, fmt.Errorf("invitation.create: %s", code)
|
||||||
@@ -101,7 +102,7 @@ func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant s
|
|||||||
|
|
||||||
for _, invitee := range members[1:] {
|
for _, invitee := range members[1:] {
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
list, lc, err := d.edge.ListInvitations(ctx, invitee.Token)
|
list, lc, err := c.ListInvitations(ctx, invitee.Token)
|
||||||
d.rec.Record("invitation.list", lc, time.Since(t0))
|
d.rec.Record("invitation.list", lc, time.Since(t0))
|
||||||
if err != nil || lc != "ok" {
|
if err != nil || lc != "ok" {
|
||||||
return nil, fmt.Errorf("invitation.list: %s", lc)
|
return nil, fmt.Errorf("invitation.list: %s", lc)
|
||||||
@@ -111,7 +112,7 @@ func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant s
|
|||||||
return nil, fmt.Errorf("no pending invitation from %s", inviter.ID)
|
return nil, fmt.Errorf("no pending invitation from %s", inviter.ID)
|
||||||
}
|
}
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
ac, err := d.edge.AcceptInvitation(ctx, invitee.Token, invID)
|
ac, err := c.AcceptInvitation(ctx, invitee.Token, invID)
|
||||||
d.rec.Record("invitation.accept", ac, time.Since(t0))
|
d.rec.Record("invitation.accept", ac, time.Since(t0))
|
||||||
if err != nil || ac != "ok" {
|
if err != nil || ac != "ok" {
|
||||||
return nil, fmt.Errorf("invitation.accept: %s", ac)
|
return nil, fmt.Errorf("invitation.accept: %s", ac)
|
||||||
@@ -119,7 +120,7 @@ func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant s
|
|||||||
}
|
}
|
||||||
|
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
games, gc, err := d.edge.GamesList(ctx, inviter.Token)
|
games, gc, err := c.GamesList(ctx, inviter.Token)
|
||||||
d.rec.Record("games.list", gc, time.Since(t0))
|
d.rec.Record("games.list", gc, time.Since(t0))
|
||||||
if err != nil || gc != "ok" {
|
if err != nil || gc != "ok" {
|
||||||
return nil, fmt.Errorf("games.list: %s", gc)
|
return nil, fmt.Errorf("games.list: %s", gc)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"scrabble/loadtest/internal/edge"
|
||||||
"scrabble/loadtest/internal/seed"
|
"scrabble/loadtest/internal/seed"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,6 +30,7 @@ func (d *Driver) Hammer(ctx context.Context, acc seed.Account, cfg HammerConfig)
|
|||||||
runCtx, cancel := context.WithTimeout(ctx, cfg.Duration)
|
runCtx, cancel := context.WithTimeout(ctx, cfg.Duration)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
d.log.Info("gateway-hammer", "workers", cfg.Workers, "duration", cfg.Duration)
|
d.log.Info("gateway-hammer", "workers", cfg.Workers, "duration", cfg.Duration)
|
||||||
|
c := edge.New(d.gateway)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for w := 0; w < cfg.Workers; w++ {
|
for w := 0; w < cfg.Workers; w++ {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
@@ -36,7 +38,7 @@ func (d *Driver) Hammer(ctx context.Context, acc seed.Account, cfg HammerConfig)
|
|||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for runCtx.Err() == nil {
|
for runCtx.Err() == nil {
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
_, code, _ := d.edge.GamesList(runCtx, acc.Token)
|
_, code, _ := c.GamesList(runCtx, acc.Token)
|
||||||
d.rec.Record("hammer:games.list", code, time.Since(t0))
|
d.rec.Record("hammer:games.list", code, time.Since(t0))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -18,18 +19,20 @@ import (
|
|||||||
"scrabble/loadtest/internal/seed"
|
"scrabble/loadtest/internal/seed"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Driver ties the edge client, the local move generator and the run recorder
|
// Driver ties the gateway endpoint, the local move generator and the run recorder
|
||||||
// together. All three are safe for concurrent use by many player goroutines.
|
// together. It builds one edge client per virtual player, so each player owns its
|
||||||
|
// h2c connection (its Subscribe stream and Execute calls share it) the way a real
|
||||||
|
// client does, rather than multiplexing every player over a single shared transport.
|
||||||
type Driver struct {
|
type Driver struct {
|
||||||
edge *edge.Client
|
gateway string // gateway base URL, e.g. http://gateway:8081
|
||||||
moves *moves.Registry
|
moves *moves.Registry
|
||||||
rec *report.Recorder
|
rec *report.Recorder
|
||||||
log *slog.Logger
|
log *slog.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDriver builds a Driver.
|
// NewDriver builds a Driver targeting the gateway base URL.
|
||||||
func NewDriver(c *edge.Client, m *moves.Registry, rec *report.Recorder, log *slog.Logger) *Driver {
|
func NewDriver(gateway string, m *moves.Registry, rec *report.Recorder, log *slog.Logger) *Driver {
|
||||||
return &Driver{edge: c, moves: m, rec: rec, log: log}
|
return &Driver{gateway: gateway, moves: m, rec: rec, log: log}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RealisticConfig parameterises the under-the-limit ramp.
|
// RealisticConfig parameterises the under-the-limit ramp.
|
||||||
@@ -98,11 +101,16 @@ func (d *Driver) RunRealistic(ctx context.Context, pool *seed.Pool, cfg Realisti
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// playerLoop runs one virtual player: a live-event subscription (loads the push hub,
|
// playerLoop runs one virtual player over its own edge client (its own h2c
|
||||||
// counts events) plus a round-robin turn loop over the player's games.
|
// connection): a live-event subscription (loads the push hub, counts events) plus a
|
||||||
|
// round-robin turn loop over the player's games. A game that has finished is dropped
|
||||||
|
// from the rotation so secondary ops stop hitting an ended game; once no active game
|
||||||
|
// remains the player idles, still holding its stream, until the run ends.
|
||||||
func (d *Driver) playerLoop(ctx context.Context, p seed.Account, games []*Game, cfg RealisticConfig, rng *rand.Rand) {
|
func (d *Driver) playerLoop(ctx context.Context, p seed.Account, games []*Game, cfg RealisticConfig, rng *rand.Rand) {
|
||||||
go d.subscribeLoop(ctx, p)
|
c := edge.New(d.gateway)
|
||||||
if len(games) == 0 {
|
go d.subscribeLoop(ctx, c, p)
|
||||||
|
active := games
|
||||||
|
if len(active) == 0 {
|
||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -114,22 +122,30 @@ func (d *Driver) playerLoop(ctx context.Context, p seed.Account, games []*Game,
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
g := games[gi%len(games)]
|
g := active[gi%len(active)]
|
||||||
gi++
|
gi++
|
||||||
if rng.Float64() < cfg.SecondaryProb {
|
if rng.Float64() < cfg.SecondaryProb {
|
||||||
d.secondaryOp(ctx, p, g, rng)
|
d.secondaryOp(ctx, c, p, g, rng)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
d.playTurn(ctx, p, g, rng)
|
if d.playTurn(ctx, c, p, g, rng) {
|
||||||
|
active = slices.DeleteFunc(active, func(x *Game) bool { return x == g })
|
||||||
|
gi = 0
|
||||||
|
if len(active) == 0 {
|
||||||
|
<-ctx.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// subscribeLoop holds the player's live-event stream open, counting events and
|
// subscribeLoop holds the player's live-event stream open on the player's client,
|
||||||
// reconnecting with a brief backoff after a drop, until the run ends.
|
// counting events and reconnecting with a brief backoff after a drop, until the run
|
||||||
func (d *Driver) subscribeLoop(ctx context.Context, p seed.Account) {
|
// ends.
|
||||||
|
func (d *Driver) subscribeLoop(ctx context.Context, c *edge.Client, p seed.Account) {
|
||||||
for ctx.Err() == nil {
|
for ctx.Err() == nil {
|
||||||
err := d.edge.Subscribe(ctx, p.Token, func(e edge.Event) { d.rec.Event(e.Kind) })
|
err := c.Subscribe(ctx, p.Token, func(e edge.Event) { d.rec.Event(e.Kind) })
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -144,80 +160,90 @@ func (d *Driver) subscribeLoop(ctx context.Context, p seed.Account) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// playTurn plays one turn in g when it is the player's move: fetch state, replay
|
// playTurn plays one turn in g over the player's client when it is the player's
|
||||||
// history, pick a legal move and submit it (or exchange / pass).
|
// move: fetch state, replay history, pick a legal move and submit it (or exchange /
|
||||||
func (d *Driver) playTurn(ctx context.Context, p seed.Account, g *Game, rng *rand.Rand) {
|
// pass). It reports whether the game has finished, so the caller can drop it from the
|
||||||
|
// rotation.
|
||||||
|
func (d *Driver) playTurn(ctx context.Context, c *edge.Client, p seed.Account, g *Game, rng *rand.Rand) (finished bool) {
|
||||||
seat := g.seatOf(p.ID.String())
|
seat := g.seatOf(p.ID.String())
|
||||||
if seat < 0 {
|
if seat < 0 {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
st, code, err := d.edge.State(ctx, p.Token, g.ID)
|
st, code, err := c.State(ctx, p.Token, g.ID)
|
||||||
d.rec.Record("game.state", code, time.Since(t0))
|
d.rec.Record("game.state", code, time.Since(t0))
|
||||||
if err != nil || code != "ok" || !st.Game.Active() || st.Game.ToMove != seat {
|
if err != nil || code != "ok" {
|
||||||
return
|
return false
|
||||||
|
}
|
||||||
|
if !st.Game.Active() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if st.Game.ToMove != seat {
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
hist, hc, err := d.edge.History(ctx, p.Token, g.ID)
|
hist, hc, err := c.History(ctx, p.Token, g.ID)
|
||||||
d.rec.Record("game.history", hc, time.Since(t0))
|
d.rec.Record("game.history", hc, time.Since(t0))
|
||||||
if err != nil || hc != "ok" {
|
if err != nil || hc != "ok" {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
action, err := d.moves.Pick(g.Variant, hist, st.Rack, st.BagLen, rng)
|
action, err := d.moves.Pick(g.Variant, hist, st.Rack, st.BagLen, rng)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d.log.Debug("pick move", "variant", g.Variant, "err", err)
|
d.log.Debug("pick move", "variant", g.Variant, "err", err)
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
switch action.Kind {
|
switch action.Kind {
|
||||||
case "play":
|
case "play":
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
_, c, _ := d.edge.SubmitPlay(ctx, p.Token, g.ID, action.Dir, action.Tiles)
|
_, code, _ := c.SubmitPlay(ctx, p.Token, g.ID, action.Dir, action.Tiles)
|
||||||
d.rec.Record("game.submit_play", c, time.Since(t0))
|
d.rec.Record("game.submit_play", code, time.Since(t0))
|
||||||
case "exchange":
|
case "exchange":
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
_, c, _ := d.edge.Exchange(ctx, p.Token, g.ID, action.Exchange)
|
_, code, _ := c.Exchange(ctx, p.Token, g.ID, action.Exchange)
|
||||||
d.rec.Record("game.exchange", c, time.Since(t0))
|
d.rec.Record("game.exchange", code, time.Since(t0))
|
||||||
default:
|
default:
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
_, c, _ := d.edge.Pass(ctx, p.Token, g.ID)
|
_, code, _ := c.Pass(ctx, p.Token, g.ID)
|
||||||
d.rec.Record("game.pass", c, time.Since(t0))
|
d.rec.Record("game.pass", code, time.Since(t0))
|
||||||
}
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// secondaryOp exercises one of the non-move edge operations the plan calls out, so
|
// secondaryOp exercises one of the non-move edge operations the plan calls out, so
|
||||||
// the run touches nudge / chat / check-word / draft / profile / stats too.
|
// the run touches nudge / chat / check-word / draft / profile / stats too, over the
|
||||||
func (d *Driver) secondaryOp(ctx context.Context, p seed.Account, g *Game, rng *rand.Rand) {
|
// player's own client.
|
||||||
|
func (d *Driver) secondaryOp(ctx context.Context, c *edge.Client, p seed.Account, g *Game, rng *rand.Rand) {
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
switch rng.Intn(7) {
|
switch rng.Intn(7) {
|
||||||
case 0:
|
case 0:
|
||||||
c, _ := d.edge.Nudge(ctx, p.Token, g.ID)
|
code, _ := c.Nudge(ctx, p.Token, g.ID)
|
||||||
d.rec.Record("chat.nudge", c, time.Since(t0))
|
d.rec.Record("chat.nudge", code, time.Since(t0))
|
||||||
case 1:
|
case 1:
|
||||||
c, _ := d.edge.ChatPost(ctx, p.Token, g.ID, "gg")
|
code, _ := c.ChatPost(ctx, p.Token, g.ID, "gg")
|
||||||
d.rec.Record("chat.post", c, time.Since(t0))
|
d.rec.Record("chat.post", code, time.Since(t0))
|
||||||
case 2:
|
case 2:
|
||||||
c, _ := d.edge.CheckWord(ctx, p.Token, g.ID, []byte{0, 1, 2})
|
code, _ := c.CheckWord(ctx, p.Token, g.ID, []byte{0, 1, 2})
|
||||||
d.rec.Record("game.check_word", c, time.Since(t0))
|
d.rec.Record("game.check_word", code, time.Since(t0))
|
||||||
case 3:
|
case 3:
|
||||||
// rack_order is an opaque string and board_tiles a (here empty) array, per the
|
// rack_order is an opaque string and board_tiles a (here empty) array, per the
|
||||||
// backend draft DTO; a malformed shape is rejected as bad_request.
|
// backend draft DTO; a malformed shape is rejected as bad_request.
|
||||||
c, _ := d.edge.DraftSave(ctx, p.Token, g.ID, `{"rack_order":"","board_tiles":[]}`)
|
code, _ := c.DraftSave(ctx, p.Token, g.ID, `{"rack_order":"","board_tiles":[]}`)
|
||||||
d.rec.Record("draft.save", c, time.Since(t0))
|
d.rec.Record("draft.save", code, time.Since(t0))
|
||||||
case 4:
|
case 4:
|
||||||
c, _ := d.edge.DraftGet(ctx, p.Token, g.ID)
|
code, _ := c.DraftGet(ctx, p.Token, g.ID)
|
||||||
d.rec.Record("draft.get", c, time.Since(t0))
|
d.rec.Record("draft.get", code, time.Since(t0))
|
||||||
case 5:
|
case 5:
|
||||||
lang := "en"
|
lang := "en"
|
||||||
if rng.Intn(2) == 1 {
|
if rng.Intn(2) == 1 {
|
||||||
lang = "ru"
|
lang = "ru"
|
||||||
}
|
}
|
||||||
c, _ := d.edge.ProfileUpdate(ctx, p.Token, p.Name, lang)
|
code, _ := c.ProfileUpdate(ctx, p.Token, p.Name, lang)
|
||||||
d.rec.Record("profile.update", c, time.Since(t0))
|
d.rec.Record("profile.update", code, time.Since(t0))
|
||||||
default:
|
default:
|
||||||
c, _ := d.edge.Stats(ctx, p.Token)
|
code, _ := c.Stats(ctx, p.Token)
|
||||||
d.rec.Record("stats.get", c, time.Since(t0))
|
d.rec.Record("stats.get", code, time.Since(t0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user