Stage 17: path-conditional CI behind an aggregate gate + connector liveness probe; Grafana move-duration panel
- #10 a `changes` job path-filters unit/integration/ui; an always-running `gate` job aggregates them (success-or-skipped) and becomes the only required check - #9 deploy adds a Telegram-connector liveness probe (docker inspect: running, not restarting, stable restart count) with a VPN-handshake grace period - #1a Game-domain dashboard gains a 'Move think-time by phase (p50/p95)' panel - deploy README: branch protection now requires only CI / gate
This commit is contained in:
+117
-2
@@ -1,6 +1,6 @@
|
||||
name: CI
|
||||
|
||||
# Single gated pipeline for the test contour (Stage 16). Gitea cannot express
|
||||
# Single gated pipeline for the test contour (Stage 16/17). Gitea cannot express
|
||||
# cross-workflow `needs`, so the full test suite and the auto test-deploy live in
|
||||
# one workflow.
|
||||
#
|
||||
@@ -11,6 +11,12 @@ name: CI
|
||||
# (PR or merge), so a PR into `master` is test-only; the prod deploy is a manual
|
||||
# workflow (Stage 18).
|
||||
#
|
||||
# Path-conditional jobs (Stage 17): `unit`/`integration`/`ui` run only when their
|
||||
# code changed (the `changes` job decides). Because a skipped required check would
|
||||
# block a merge under branch protection, the always-running `gate` job aggregates
|
||||
# their results and is the ONLY required status check; it passes when every
|
||||
# upstream job either succeeded or was skipped.
|
||||
#
|
||||
# Console output is kept plain (NO_COLOR + `docker compose --ansi never` +
|
||||
# `--progress plain`) so the Gitea logs stay readable.
|
||||
|
||||
@@ -21,7 +27,57 @@ on:
|
||||
branches: [development]
|
||||
|
||||
jobs:
|
||||
# changes detects which areas a PR/push touched, so the test jobs can skip when
|
||||
# irrelevant. It defaults to running everything when the diff cannot be computed.
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
outputs:
|
||||
go: ${{ steps.filter.outputs.go }}
|
||||
ui: ${{ steps.filter.outputs.ui }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect changed paths
|
||||
id: filter
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
git fetch -q origin "${{ github.base_ref }}" || true
|
||||
range="origin/${{ github.base_ref }}...HEAD"
|
||||
else
|
||||
before="${{ github.event.before }}"
|
||||
if [ -z "$before" ] || [ "$before" = "0000000000000000000000000000000000000000" ] || ! git cat-file -e "${before}^{commit}" 2>/dev/null; then
|
||||
range="HEAD~1...HEAD"
|
||||
else
|
||||
range="${before}...HEAD"
|
||||
fi
|
||||
fi
|
||||
echo "comparison range: $range"
|
||||
# Default to running everything; narrow only when the diff is computable.
|
||||
go=true; ui=true
|
||||
files="$(git diff --name-only "$range" 2>/dev/null || echo __DIFF_FAILED__)"
|
||||
if [ "$files" != "__DIFF_FAILED__" ]; then
|
||||
echo "changed files:"; echo "$files"
|
||||
go=false; ui=false
|
||||
if echo "$files" | grep -qE '^(backend/|pkg/|gateway/|platform/|go\.work)'; then go=true; fi
|
||||
if echo "$files" | grep -qE '^ui/'; then ui=true; fi
|
||||
# A workflow or deploy change re-runs everything as a safety net.
|
||||
if echo "$files" | grep -qE '^(\.gitea/workflows/|deploy/)'; then go=true; ui=true; fi
|
||||
else
|
||||
echo "diff failed; running all jobs"
|
||||
fi
|
||||
echo "selected: go=$go ui=$ui"
|
||||
echo "go=$go" >> "$GITHUB_OUTPUT"
|
||||
echo "ui=$ui" >> "$GITHUB_OUTPUT"
|
||||
|
||||
unit:
|
||||
needs: changes
|
||||
if: ${{ needs.changes.outputs.go == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
@@ -67,6 +123,8 @@ jobs:
|
||||
run: go test -count=1 ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
integration:
|
||||
needs: changes
|
||||
if: ${{ needs.changes.outputs.go == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
@@ -102,6 +160,8 @@ jobs:
|
||||
run: go test -tags=integration -count=1 -p=1 -parallel=1 -timeout=15m ./backend/...
|
||||
|
||||
ui:
|
||||
needs: changes
|
||||
if: ${{ needs.changes.outputs.ui == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
@@ -142,10 +202,37 @@ jobs:
|
||||
run: pnpm run test:e2e
|
||||
timeout-minutes: 5
|
||||
|
||||
# gate is the single branch-protection required check. It always runs and passes
|
||||
# only when each upstream job succeeded or was skipped (a path-filtered no-op),
|
||||
# failing the merge if any actually failed or was cancelled.
|
||||
gate:
|
||||
needs: [unit, integration, ui]
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Aggregate required checks
|
||||
run: |
|
||||
fail=
|
||||
for r in "unit:${{ needs.unit.result }}" "integration:${{ needs.integration.result }}" "ui:${{ needs.ui.result }}"; do
|
||||
name="${r%%:*}"; res="${r#*:}"
|
||||
echo "$name = $res"
|
||||
case "$res" in
|
||||
success|skipped) ;;
|
||||
*) echo "::error::$name=$res"; fail=1 ;;
|
||||
esac
|
||||
done
|
||||
[ -z "$fail" ] || { echo "one or more required jobs failed"; exit 1; }
|
||||
echo "all required jobs passed or were skipped"
|
||||
|
||||
deploy:
|
||||
# Auto test-deploy on a PR into development and on the push that merges it.
|
||||
# A PR into master is test-only (this job is skipped); prod deploy is manual.
|
||||
needs: [unit, integration, ui]
|
||||
# Gates on `gate` (so a real test failure blocks the deploy) but runs even when
|
||||
# some test jobs were path-skipped.
|
||||
needs: [gate]
|
||||
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/development') || (github.event_name == 'pull_request' && github.base_ref == 'development') }}
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
@@ -215,6 +302,34 @@ jobs:
|
||||
docker logs --tail 50 scrabble-gateway || true
|
||||
exit 1
|
||||
|
||||
- name: Probe the Telegram connector liveness
|
||||
run: |
|
||||
set -u
|
||||
# The gateway probe cannot see a crash-looping connector (it long-polls and
|
||||
# egresses through the VPN sidecar, with no public ingress). Inspect the
|
||||
# container directly: it must be running, not restarting, with a stable
|
||||
# restart count. A grace period lets the VPN handshake settle (the connector
|
||||
# may restart a few times first).
|
||||
sleep 20
|
||||
for i in $(seq 1 20); do
|
||||
status="$(docker inspect -f '{{.State.Status}}' scrabble-telegram 2>/dev/null || echo missing)"
|
||||
restarting="$(docker inspect -f '{{.State.Restarting}}' scrabble-telegram 2>/dev/null || echo true)"
|
||||
if [ "$status" = "running" ] && [ "$restarting" = "false" ]; then
|
||||
c1="$(docker inspect -f '{{.RestartCount}}' scrabble-telegram)"
|
||||
sleep 5
|
||||
c2="$(docker inspect -f '{{.RestartCount}}' scrabble-telegram)"
|
||||
if [ "$c1" = "$c2" ]; then
|
||||
echo "connector healthy: status=$status restarts=$c2"
|
||||
exit 0
|
||||
fi
|
||||
echo "connector still restarting ($c1 -> $c2); waiting"
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
echo "connector not healthy; recent logs:"
|
||||
docker logs --tail 80 scrabble-telegram || true
|
||||
exit 1
|
||||
|
||||
- name: Prune dangling images
|
||||
if: always()
|
||||
run: docker image prune -f
|
||||
|
||||
Reference in New Issue
Block a user