From 32dc29359a6284a544cc50fca37398d851996cbe Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Wed, 22 Apr 2026 08:49:45 +0200 Subject: [PATCH] feat: notification service --- ARCHITECTURE.md | 85 +- TESTING.md | 96 +- gateway/README.md | 20 + gateway/docs/examples.md | 8 +- .../events/push_grpc_integration_test.go | 47 +- geoprofile/PLAN.md | 7 +- geoprofile/README.md | 17 +- go.work | 3 + integration/README.md | 37 +- .../gateway_authsession_mail_test.go | 19 + .../gateway_authsession_user_mail_test.go | 691 +++++++++++++ integration/internal/harness/process.go | 21 +- .../notification_gateway_test.go | 526 ++++++++++ .../notification_mail_test.go | 622 ++++++++++++ .../notification_user_test.go | 391 ++++++++ mail/README.md | 20 + mail/api/delivery-commands-asyncapi.yaml | 2 +- mail/docs/examples.md | 2 +- .../adapters/templates/catalog_test.go | 38 +- .../templates/checked_in_assets_test.go | 58 ++ .../api/streamcommand/contract_test.go | 10 +- mail/internal/app/runtime_test.go | 6 +- mail/internal/domain/delivery/model_test.go | 2 +- .../acceptgenericdelivery/service_test.go | 6 +- .../service/executeattempt/service_test.go | 6 +- .../service/renderdelivery/service_test.go | 32 +- .../service/resenddelivery/service_test.go | 4 +- mail/internal/worker/command_consumer_test.go | 4 +- mail/templates/game.finished/en/subject.tmpl | 1 + mail/templates/game.finished/en/text.tmpl | 4 + .../game.generation_failed/en/subject.tmpl | 1 + .../game.generation_failed/en/text.tmpl | 4 + .../templates/game.turn.ready/en/subject.tmpl | 1 + mail/templates/game.turn.ready/en/text.tmpl | 4 + .../geo.review_recommended/en/subject.tmpl | 1 + .../geo.review_recommended/en/text.tmpl | 5 + .../en/subject.tmpl | 1 + .../lobby.application.submitted/en/text.tmpl | 4 + .../lobby.invite.created/en/subject.tmpl | 1 + .../lobby.invite.created/en/text.tmpl | 4 + .../lobby.invite.expired/en/subject.tmpl | 1 + .../lobby.invite.expired/en/text.tmpl | 4 + .../lobby.invite.redeemed/en/subject.tmpl | 1 + .../lobby.invite.redeemed/en/text.tmpl | 4 + .../lobby.membership.approved/en/subject.tmpl | 1 + .../lobby.membership.approved/en/text.tmpl | 3 + .../lobby.membership.rejected/en/subject.tmpl | 1 + .../lobby.membership.rejected/en/text.tmpl | 3 + .../en/subject.tmpl | 1 + .../en/text.tmpl | 3 + notification/PLAN.md | 365 +++++++ notification/README.md | 665 ++++++++++++ notification/api/intents-asyncapi.yaml | 556 ++++++++++ notification/cmd/notification/main.go | 45 + notification/contract_asyncapi_test.go | 591 +++++++++++ notification/docs/README.md | 25 + notification/docs/examples.md | 145 +++ notification/docs/flows.md | 130 +++ notification/docs/runbook.md | 167 ++++ notification/docs/runtime.md | 206 ++++ notification/documentation_contract_test.go | 57 ++ notification/go.mod | 90 ++ notification/go.sum | 195 ++++ .../intent_acceptance_contract_test.go | 41 + notification/internal/adapters/doc.go | 2 + .../internal/adapters/redis/client.go | 72 ++ .../adapters/redisstate/acceptance_store.go | 140 +++ .../redisstate/acceptance_store_test.go | 311 ++++++ .../adapters/redisstate/atomic_writer.go | 157 +++ .../internal/adapters/redisstate/codecs.go | 547 ++++++++++ .../internal/adapters/redisstate/doc.go | 3 + .../internal/adapters/redisstate/errors.go | 10 + .../internal/adapters/redisstate/keyspace.go | 105 ++ .../redisstate/malformed_intent_store.go | 59 ++ .../adapters/redisstate/route_state_store.go | 657 ++++++++++++ .../redisstate/route_state_store_test.go | 465 +++++++++ .../redisstate/stream_offset_store.go | 160 +++ .../internal/adapters/userservice/client.go | 243 +++++ .../adapters/userservice/client_test.go | 219 ++++ notification/internal/api/doc.go | 2 + .../internal/api/intentstream/contract.go | 147 +++ .../api/intentstream/contract_test.go | 145 +++ .../internal/api/internalhttp/server.go | 252 +++++ .../internal/api/internalhttp/server_test.go | 272 +++++ notification/internal/app/app.go | 168 ++++ notification/internal/app/runtime.go | 229 +++++ .../internal/app/runtime_smoke_test.go | 72 ++ notification/internal/app/runtime_test.go | 581 +++++++++++ notification/internal/config/config.go | 839 ++++++++++++++++ notification/internal/config/config_test.go | 252 +++++ notification/internal/logging/logger.go | 112 +++ .../internal/service/acceptintent/service.go | 946 ++++++++++++++++++ .../service/acceptintent/service_test.go | 613 ++++++++++++ notification/internal/service/doc.go | 3 + .../internal/service/malformedintent/model.go | 135 +++ .../internal/service/publishmail/encoder.go | 178 ++++ .../service/publishmail/encoder_test.go | 275 +++++ .../internal/service/publishpush/encoder.go | 221 ++++ .../service/publishpush/encoder_test.go | 186 ++++ notification/internal/telemetry/runtime.go | 694 +++++++++++++ .../internal/telemetry/runtime_test.go | 228 +++++ notification/internal/worker/doc.go | 3 + .../internal/worker/email_publisher.go | 421 ++++++++ .../internal/worker/email_publisher_test.go | 232 +++++ .../internal/worker/intent_consumer.go | 331 ++++++ .../internal/worker/intent_consumer_test.go | 422 ++++++++ .../internal/worker/push_publisher.go | 499 +++++++++ .../internal/worker/push_publisher_test.go | 318 ++++++ .../internal/worker/telemetry_test.go | 184 ++++ notification/mail_template_contract_test.go | 185 ++++ .../observability_recovery_contract_test.go | 34 + notification/openapi.yaml | 106 ++ .../producer_integration_contract_test.go | 167 ++++ notification/push_payload_contract_test.go | 161 +++ notification/redis_state_contract_test.go | 87 ++ .../route_publication_contract_test.go | 71 ++ notification/runtime_contract_test.go | 81 ++ notification/user_enrichment_contract_test.go | 43 + pkg/notificationintent/go.mod | 24 + pkg/notificationintent/go.sum | 31 + pkg/notificationintent/intent.go | 872 ++++++++++++++++ pkg/notificationintent/intent_test.go | 298 ++++++ pkg/notificationintent/payloads.go | 162 +++ pkg/notificationintent/publisher.go | 73 ++ pkg/notificationintent/publisher_test.go | 44 + pkg/schema/fbs/notification.fbs | 38 + .../fbs/notification/GameFinishedEvent.go | 75 ++ .../fbs/notification/GameTurnReadyEvent.go | 75 ++ .../LobbyApplicationSubmittedEvent.go | 71 ++ .../notification/LobbyInviteCreatedEvent.go | 71 ++ .../notification/LobbyInviteRedeemedEvent.go | 71 ++ .../LobbyMembershipApprovedEvent.go | 60 ++ .../LobbyMembershipRejectedEvent.go | 60 ++ pkg/transcoder/notification.go | 390 ++++++++ pkg/transcoder/notification_test.go | 387 +++++++ 135 files changed, 21828 insertions(+), 130 deletions(-) create mode 100644 integration/gatewayauthsessionusermail/gateway_authsession_user_mail_test.go create mode 100644 integration/notificationgateway/notification_gateway_test.go create mode 100644 integration/notificationmail/notification_mail_test.go create mode 100644 integration/notificationuser/notification_user_test.go create mode 100644 mail/internal/adapters/templates/checked_in_assets_test.go create mode 100644 mail/templates/game.finished/en/subject.tmpl create mode 100644 mail/templates/game.finished/en/text.tmpl create mode 100644 mail/templates/game.generation_failed/en/subject.tmpl create mode 100644 mail/templates/game.generation_failed/en/text.tmpl create mode 100644 mail/templates/game.turn.ready/en/subject.tmpl create mode 100644 mail/templates/game.turn.ready/en/text.tmpl create mode 100644 mail/templates/geo.review_recommended/en/subject.tmpl create mode 100644 mail/templates/geo.review_recommended/en/text.tmpl create mode 100644 mail/templates/lobby.application.submitted/en/subject.tmpl create mode 100644 mail/templates/lobby.application.submitted/en/text.tmpl create mode 100644 mail/templates/lobby.invite.created/en/subject.tmpl create mode 100644 mail/templates/lobby.invite.created/en/text.tmpl create mode 100644 mail/templates/lobby.invite.expired/en/subject.tmpl create mode 100644 mail/templates/lobby.invite.expired/en/text.tmpl create mode 100644 mail/templates/lobby.invite.redeemed/en/subject.tmpl create mode 100644 mail/templates/lobby.invite.redeemed/en/text.tmpl create mode 100644 mail/templates/lobby.membership.approved/en/subject.tmpl create mode 100644 mail/templates/lobby.membership.approved/en/text.tmpl create mode 100644 mail/templates/lobby.membership.rejected/en/subject.tmpl create mode 100644 mail/templates/lobby.membership.rejected/en/text.tmpl create mode 100644 mail/templates/lobby.runtime_paused_after_start/en/subject.tmpl create mode 100644 mail/templates/lobby.runtime_paused_after_start/en/text.tmpl create mode 100644 notification/PLAN.md create mode 100644 notification/README.md create mode 100644 notification/api/intents-asyncapi.yaml create mode 100644 notification/cmd/notification/main.go create mode 100644 notification/contract_asyncapi_test.go create mode 100644 notification/docs/README.md create mode 100644 notification/docs/examples.md create mode 100644 notification/docs/flows.md create mode 100644 notification/docs/runbook.md create mode 100644 notification/docs/runtime.md create mode 100644 notification/documentation_contract_test.go create mode 100644 notification/go.mod create mode 100644 notification/go.sum create mode 100644 notification/intent_acceptance_contract_test.go create mode 100644 notification/internal/adapters/doc.go create mode 100644 notification/internal/adapters/redis/client.go create mode 100644 notification/internal/adapters/redisstate/acceptance_store.go create mode 100644 notification/internal/adapters/redisstate/acceptance_store_test.go create mode 100644 notification/internal/adapters/redisstate/atomic_writer.go create mode 100644 notification/internal/adapters/redisstate/codecs.go create mode 100644 notification/internal/adapters/redisstate/doc.go create mode 100644 notification/internal/adapters/redisstate/errors.go create mode 100644 notification/internal/adapters/redisstate/keyspace.go create mode 100644 notification/internal/adapters/redisstate/malformed_intent_store.go create mode 100644 notification/internal/adapters/redisstate/route_state_store.go create mode 100644 notification/internal/adapters/redisstate/route_state_store_test.go create mode 100644 notification/internal/adapters/redisstate/stream_offset_store.go create mode 100644 notification/internal/adapters/userservice/client.go create mode 100644 notification/internal/adapters/userservice/client_test.go create mode 100644 notification/internal/api/doc.go create mode 100644 notification/internal/api/intentstream/contract.go create mode 100644 notification/internal/api/intentstream/contract_test.go create mode 100644 notification/internal/api/internalhttp/server.go create mode 100644 notification/internal/api/internalhttp/server_test.go create mode 100644 notification/internal/app/app.go create mode 100644 notification/internal/app/runtime.go create mode 100644 notification/internal/app/runtime_smoke_test.go create mode 100644 notification/internal/app/runtime_test.go create mode 100644 notification/internal/config/config.go create mode 100644 notification/internal/config/config_test.go create mode 100644 notification/internal/logging/logger.go create mode 100644 notification/internal/service/acceptintent/service.go create mode 100644 notification/internal/service/acceptintent/service_test.go create mode 100644 notification/internal/service/doc.go create mode 100644 notification/internal/service/malformedintent/model.go create mode 100644 notification/internal/service/publishmail/encoder.go create mode 100644 notification/internal/service/publishmail/encoder_test.go create mode 100644 notification/internal/service/publishpush/encoder.go create mode 100644 notification/internal/service/publishpush/encoder_test.go create mode 100644 notification/internal/telemetry/runtime.go create mode 100644 notification/internal/telemetry/runtime_test.go create mode 100644 notification/internal/worker/doc.go create mode 100644 notification/internal/worker/email_publisher.go create mode 100644 notification/internal/worker/email_publisher_test.go create mode 100644 notification/internal/worker/intent_consumer.go create mode 100644 notification/internal/worker/intent_consumer_test.go create mode 100644 notification/internal/worker/push_publisher.go create mode 100644 notification/internal/worker/push_publisher_test.go create mode 100644 notification/internal/worker/telemetry_test.go create mode 100644 notification/mail_template_contract_test.go create mode 100644 notification/observability_recovery_contract_test.go create mode 100644 notification/openapi.yaml create mode 100644 notification/producer_integration_contract_test.go create mode 100644 notification/push_payload_contract_test.go create mode 100644 notification/redis_state_contract_test.go create mode 100644 notification/route_publication_contract_test.go create mode 100644 notification/runtime_contract_test.go create mode 100644 notification/user_enrichment_contract_test.go create mode 100644 pkg/notificationintent/go.mod create mode 100644 pkg/notificationintent/go.sum create mode 100644 pkg/notificationintent/intent.go create mode 100644 pkg/notificationintent/intent_test.go create mode 100644 pkg/notificationintent/payloads.go create mode 100644 pkg/notificationintent/publisher.go create mode 100644 pkg/notificationintent/publisher_test.go create mode 100644 pkg/schema/fbs/notification.fbs create mode 100644 pkg/schema/fbs/notification/GameFinishedEvent.go create mode 100644 pkg/schema/fbs/notification/GameTurnReadyEvent.go create mode 100644 pkg/schema/fbs/notification/LobbyApplicationSubmittedEvent.go create mode 100644 pkg/schema/fbs/notification/LobbyInviteCreatedEvent.go create mode 100644 pkg/schema/fbs/notification/LobbyInviteRedeemedEvent.go create mode 100644 pkg/schema/fbs/notification/LobbyMembershipApprovedEvent.go create mode 100644 pkg/schema/fbs/notification/LobbyMembershipRejectedEvent.go create mode 100644 pkg/transcoder/notification.go create mode 100644 pkg/transcoder/notification_test.go diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 6b4d80d..c1fa4fd 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -218,7 +218,7 @@ Important architectural rules: * public auth stays synchronous; * `confirm-email-code` returns a ready `device_session_id`; -* no async “pending session provisioning” stage exists; +* no async “pending session provisioning” step exists; * session source of truth and gateway-facing projection remain separate; * active-session limits are configuration-driven; * `send-email-code` stays success-shaped for existing, new, blocked, and throttled email flows. @@ -291,7 +291,12 @@ Transport rules: * `Auth / Session Service -> Mail Service` uses the dedicated synchronous trusted internal REST contract `POST /api/v1/internal/login-code-deliveries`; * `Notification Service -> Mail Service` is an asynchronous internal command - flow carried through the event bus or an equivalent queue-backed handoff. + flow carried through dedicated queue-backed handoff after durable route + acceptance inside `Notification Service`. + +This split is covered by integration tests: auth-code delivery bypasses +`Notification Service`, while notification-generated mail uses template-mode +commands whose `template_id` equals `notification_type`. `Mail Service` may internally queue both flows. Its trusted operator read and resend APIs are part of the v1 service surface, @@ -353,7 +358,7 @@ It is the source of truth for: * game records before and after runtime existence; * public/private game type; * owner of a private game; -* invitations and invite code lifecycle; +* user-bound invitations and invite lifecycle; * applications and approvals; * membership and roster; * blocked/removed participants at platform level; @@ -410,8 +415,9 @@ Public games: Private games: * can be created only by eligible paid users; -* visible only to their owner and to invited users who used an invite code and were accepted; -* joining uses invite code plus owner approval; +* visible only to their owner and to invited users whose invitation is bound + to a concrete `user_id` and later accepted; +* joining uses a user-bound invite plus owner approval; * invite lifecycle belongs entirely to `Game Lobby`. Private-party owners get a limited owner-admin capability set, not full system admin power. @@ -527,20 +533,42 @@ It executes runtime jobs for `Game Lobby` and `Game Master`. This is a hard invariant. -## 10. Notification Service +## 10. [Notification Service](notification/README.md) `Notification Service` is the async delivery/orchestration layer for platform notifications. It has a deliberately minimal role: -* consume domain/integration events from services; -* decide whether a given event should result in push, email, or both; -* render and route notification payloads; -* send push-targeted events toward gateway; -* send email-targeted asynchronous commands toward `Mail Service`. +* consume normalized notification intents from services through dedicated + Redis Stream `notification:intents`; +* validate idempotency and persist durable notification route state; +* enrich user-targeted routes with `email` and `preferred_language` from + `User Service`; +* decide whether a given notification type results in `push`, `email`, or + both; +* send user-targeted `push` events toward gateway by `user_id`; +* send non-auth email asynchronous commands toward `Mail Service`. It is not a source of truth for user preferences in v1 unless a later feature requires it. +For user-targeted intents, upstream producers publish the concrete recipient +`user_id` values. `Notification Service` resolves user email and locale from +`User Service`, uses configured administrator email lists per +`notification_type` for admin-only notifications, keeps +`template_id == notification_type` for notification-generated email, and +treats private-game invite flows in v1 as user-bound by internal `user_id`. +Go producers use the shared `galaxy/notificationintent` module to build and +append compatible intents into `notification:intents`; a failed append is a +notification degradation signal and must not roll back already committed source +business state. +Acceptance of a user-targeted notification intent is complete only after every +published recipient `user_id` resolves through `User Service`; unresolved user +ids are treated as producer input defects and are recorded as malformed +notification intents rather than deferred publication failures. + +User-facing notifications use `push+email` unless a type explicitly opts out of +one channel. Administrator-facing notifications are `email`-only in v1. + All platform notifications except auth-code delivery flow through this service, including: * game lifecycle notifications; @@ -548,6 +576,13 @@ All platform notifications except auth-code delivery flow through this service, * new turn notifications; * operational/admin notifications where appropriate. +The current process surface exposes only one private probe HTTP listener with +`GET /healthz` and `GET /readyz`; that probe surface is documented in +[`notification/openapi.yaml`](notification/openapi.yaml). The canonical +notification-intent stream contract remains +[`notification/api/intents-asyncapi.yaml`](notification/api/intents-asyncapi.yaml). +It does not expose an operator REST API. + ## 11. Billing Service (future) `Billing Service` is not part of the first implementation wave. @@ -611,6 +646,12 @@ The platform uses one simple rule: * `Lobby -> Runtime Manager` runtime jobs; * `Game Master -> Runtime Manager` runtime jobs; * all event-bus propagation; +* `Game Master -> Notification Service` notification intents through + `notification:intents`; +* `Game Lobby -> Notification Service` notification intents through + `notification:intents`; +* `Geo Profile Service -> Notification Service` notification intents through + `notification:intents`; * `Notification Service -> Gateway`; * `Notification Service -> Mail Service`; * geo auxiliary ingest from gateway to geo service; @@ -626,13 +667,14 @@ The main example is `Lobby -> Game Master`: ## Redis as Data and Event Infrastructure -Redis is the first-stage shared infrastructure for: +Redis is the initial shared infrastructure for: * main persistent data of services where no SQL backend is yet introduced; * gateway session cache backing data; * replay reservation store for gateway; * session lifecycle projection; * internal event bus using Redis Streams; +* notification-intent ingress through `notification:intents`; * notification fan-out; * runtime job completion events; * lobby/game-master propagation events; @@ -640,7 +682,7 @@ Redis is the first-stage shared infrastructure for: Redis Streams are therefore the platform event bus in v1. -This is an accepted trade-off for simpler early-stage infrastructure. +This is an accepted trade-off for simpler early infrastructure. Service boundaries must still stay storage-agnostic where future SQL migration is expected, especially in `Auth / Session Service`. ## Main End-to-End Flows @@ -791,7 +833,7 @@ sequenceDiagram Engine-->>GM: new turn result / maybe finished GM->>GM: update current_turn and runtime state GM->>Lobby: sync runtime snapshot - GM->>Notify: publish new-turn event + GM->>Notify: publish new-turn intent Notify->>Gateway: client-facing push events else generation failed Engine-->>GM: error / timeout @@ -820,7 +862,7 @@ sequenceDiagram GM->>GM: update runtime state GM->>Lobby: mark platform game finished Lobby->>Lobby: finalize game record - GM->>Notify: publish finish event + GM->>Notify: publish game-finished intent Notify->>Gateway: push user-facing/platform events ``` @@ -923,7 +965,7 @@ Uses the normal authenticated client protocol, not the separate system admin UI. Allowed owner-admin actions are limited to the owner’s own private games and include at least: * initiate enrollment; -* distribute invite codes outside the system; +* create and manage user-bound invites inside the system; * approve/reject applicants; * start game after enrollment; * force next turn while running; @@ -961,11 +1003,12 @@ Recommended order for implementation is: 3. **User Service** (implemented) Regular-user identity, profile/settings, tariffs/entitlements, user limits, sanctions, and current `declared_country`. -4. **Mail Service** - Internal email delivery for auth codes first, later for platform notifications. +4. **Mail Service** (implemented) + Internal email delivery for auth codes and platform notification mail. -5. **Notification Service** - Unified async delivery of push and non-auth email notifications. +5. **Notification Service** (implemented) + Unified async delivery of push and non-auth email notifications, with + real Gateway and Mail Service boundary coverage. 6. **Game Lobby Service** Platform game records, membership, invites, applications, approvals, schedules, user-facing lists, pre-start lifecycle. @@ -983,6 +1026,6 @@ Recommended order for implementation is: Auxiliary geo aggregation, review recommendation, suspicious-session blocking, declared-country workflow. 11. **Billing Service** - Late-stage payment and subscription source feeding entitlements into `User Service`. + Future payment and subscription source feeding entitlements into `User Service`. This order gives the platform a usable public perimeter first, then identity/auth, then core gameplay lifecycle, then runtime orchestration, and only afterward secondary auxiliary services. diff --git a/TESTING.md b/TESTING.md index 965de1a..96c05d4 100644 --- a/TESTING.md +++ b/TESTING.md @@ -2,7 +2,7 @@ ## Purpose -This document defines the testing strategy for the [Galaxy Game](ARCHITECTURE.md) platform and provides a staged testing matrix aligned with the agreed service implementation order. +This document defines the testing strategy for the [Galaxy Game](ARCHITECTURE.md) platform and provides an ordered testing matrix aligned with the agreed service implementation order. The strategy is built around the current architecture constraints: @@ -199,7 +199,7 @@ The testing plan follows this service order: * later-event-wins behavior for session snapshots * subscriber shutdown interrupts blocking reads -### Inter-service integration tests at this stage +### Inter-service integration tests for this boundary * `Gateway <-> Redis` @@ -218,7 +218,7 @@ The testing plan follows this service order: * verified authenticated command routing * signed response generation after downstream success -### Regression tests to keep from this stage onward +### Regression tests to keep * Authenticated request verification pipeline remains stable. * Public auth routes remain mounted and deterministic. @@ -344,7 +344,7 @@ The testing plan follows this service order: * ensure-user compatibility for confirm * exists/block compatibility for internal revoke/block flows -### Regression tests to keep from this stage onward +### Regression tests to keep * `confirm-email-code` always returns a ready `device_session_id`. * Gateway continues authenticating from cache rather than synchronous auth lookups. @@ -427,7 +427,7 @@ The testing plan follows this service order: * repeat login by same email without overwriting create-only settings * blocked email/user behavior -### Regression tests to keep from this stage onward +### Regression tests to keep * User resolution outcomes remain stable for auth flow. * User-facing profile APIs do not bypass auth/session rules. @@ -476,7 +476,7 @@ The testing plan follows this service order: * Keep `Mail Service` stubbed in most broader suites. * Add only a small dedicated smoke suite for the real mail adapter. -### Regression tests to keep from this stage onward +### Regression tests to keep * Auth code mail remains a direct dependency of auth flow. * Mail failures do not corrupt auth challenge/session state. @@ -488,11 +488,22 @@ The testing plan follows this service order: ### Service tests -* Event intake tests: +* Runtime-skeleton tests: - * accepted event types + * configuration loading and validation + * probe listener startup + * `GET /healthz` + * `GET /readyz` + * no `/metrics` + * Redis startup fast-fail + * graceful shutdown of the probe listener +* Intent intake tests: + + * accepted notification types * malformed event rejection * idempotent duplicate handling + * conflicting duplicate rejection + * AsyncAPI contract validation for `notification:intents` * Routing decision tests: * push only @@ -501,9 +512,9 @@ The testing plan follows this service order: * discard/no-delivery cases * Rendering tests: - * event-to-notification mapping - * payload shaping for push - * payload shaping for email + * intent-to-route mapping + * FlatBuffers payload shaping for push + * template-variable shaping for email * Failure isolation tests: * push failure does not corrupt email route decision @@ -511,7 +522,7 @@ The testing plan follows this service order: * retriable delivery behavior * Redis/event bus tests: - * consume domain/integration events + * consume normalized notification intents * publish client-facing events for gateway * enqueue mail commands for mail service @@ -520,20 +531,49 @@ The testing plan follows this service order: * `Notification <-> Gateway` * client-facing event publication and push delivery - * user-targeted vs session-targeted push routing + * user-targeted fan-out without session-targeted routing + * all seven user-facing push types + * `notificationgateway` runs real `Notification Service`, real + `User Service`, real `Edge Gateway`, and real Redis * `Notification <-> Mail` * non-auth email delivery * retry/failure isolation -* `Lobby/other fake producers <-> Notification` + * template-mode handoff for every notification email type, including both + user and administrator variants of `lobby.application.submitted` + * `notificationmail` runs real `Notification Service`, real `User Service`, + real `Mail Service`, and real Redis +* `Notification <-> User` - * domain event intake compatibility + * successful recipient enrichment + * `recipient_not_found` for missing users + * no stream-offset advancement while `User Service` is temporarily unavailable + * `notificationuser` runs real `Notification Service`, real `User Service`, + and real Redis +* `Gateway <-> Auth / Session <-> User <-> Mail` + + * public registration through the real mail path + * user creation through `User Service` + * gateway session projection + * regression that auth-code email bypasses `notification:intents` + * `gatewayauthsessionusermail` runs real `Edge Gateway`, real + `Auth / Session Service`, real `User Service`, real `Mail Service`, and + real Redis +* Producer contract compatibility + + * notification-intent contract compatibility + * Game Master, Game Lobby, and Geo Profile Service stay covered by + `galaxy/notificationintent` until those real producer boundaries exist * Assert explicitly that auth-code emails still bypass notification and go directly from auth to mail. -### Regression tests to keep from this stage onward +### Regression tests to keep * Notification stays delivery/orchestration-only and does not become source of truth. * Non-auth notifications consistently go through notification service. +* Producer-owned audience resolution remains stable: user-targeted producers + publish concrete recipient `user_id` values, while admin-only recipients + remain type-specific notification-service configuration. +* Private-game invite notifications remain user-bound by internal `user_id`. * Gateway push compatibility remains stable. --- @@ -560,10 +600,10 @@ The testing plan follows this service order: * visibility rules for private games * Invite lifecycle tests: - * invite code creation - * invite code redemption + * user-bound invite creation + * invite acceptance * invite approval/rejection - * invite expiration if applicable later + * invite expiration and revoke handling where applicable * Application and approval tests: * public game application @@ -615,7 +655,7 @@ The testing plan follows this service order: * authenticated context correctly propagated from gateway * Keep runtime launch boundaries stubbed until `Runtime Manager` exists. -### Regression tests to keep from this stage onward +### Regression tests to keep * `Lobby` remains source of truth for platform game metadata and membership. * `Lobby` user-facing game lists remain independent from `Game Master`. @@ -669,7 +709,7 @@ The testing plan follows this service order: * optional operational event routing if enabled * Use a fake or test runtime backend first, then a targeted smoke suite against a real local Docker backend. -### Regression tests to keep from this stage onward +### Regression tests to keep * Runtime Manager remains the only component talking to Docker API. * Runtime job event contracts remain stable for `Lobby` and later `Game Master`. @@ -752,7 +792,7 @@ The testing plan follows this service order: * setup call * finish callback -### Regression tests to keep from this stage onward +### Regression tests to keep * `Game Master` remains the only service allowed to call game engine containers. * Turn cutoff logic stays authoritative at platform level. @@ -805,7 +845,7 @@ The testing plan follows this service order: * admin-generated notifications where needed -### Regression tests to keep from this stage onward +### Regression tests to keep * Admin Service remains orchestration/backend only. * System admin capabilities remain separate from private-owner capabilities. @@ -872,7 +912,7 @@ The testing plan follows this service order: * optional admin notification flow * Keep geo processing fail-open relative to gameplay in all integration tests. -### Regression tests to keep from this stage onward +### Regression tests to keep * Geo processing never blocks the current gameplay request. * Review-recommended state remains queryable even when event/mail side effects fail. @@ -920,7 +960,7 @@ The testing plan follows this service order: * user-facing entitlement reads reflect billing-fed updates correctly -### Regression tests to keep from this stage onward +### Regression tests to keep * Other services never depend directly on billing for live entitlement decisions. * `User Service` remains the source of truth for current entitlement. @@ -979,8 +1019,8 @@ By default, they should use: ### D. Private game lifecycle * Eligible user creates private game. -* Owner creates invite code. -* Another user redeems invite code and applies. +* Owner creates a user-bound invite. +* The invited user accepts the invite and applies. * Owner approves application. * Owner starts game. * Running registration completes. @@ -1030,7 +1070,7 @@ By default, they should use: ### J. Notification routing flow -* Lobby emits invite/application/approval events. +* Lobby emits invite/application/approval notification intents. * Notification Service sends push through gateway. * Non-auth email notifications route through Notification Service to Mail Service. * Auth-code emails remain direct `Auth / Session -> Mail`. diff --git a/gateway/README.md b/gateway/README.md index 0f7bd7b..ebfefc2 100644 --- a/gateway/README.md +++ b/gateway/README.md @@ -477,6 +477,26 @@ payload only: `user_id`, optional `device_session_id`, `event_type`, gateway derives `timestamp_ms`, recomputes `payload_hash`, signs the event, and only then forwards it to the matching `SubscribeEvents` streams. +Notification-owned user-facing payloads are expected to use +`pkg/schema/fbs/notification.fbs`. The initial notification event vocabulary +in v1 is exactly: + +- `game.turn.ready` +- `game.finished` +- `lobby.application.submitted` +- `lobby.membership.approved` +- `lobby.membership.rejected` +- `lobby.invite.created` +- `lobby.invite.redeemed` + +`lobby.application.submitted` is published toward `Gateway` only for the +private-game owner flow. The public-game variant is email-only. +The real `Notification Service -> Gateway` integration suite verifies this +user-targeted fan-out path and asserts that notification-owned push events do +not include `device_session_id`, so Gateway delivers them to every active +stream for the target user. Auth-code email does not use this push path and +continues to bypass `Notification Service`. + ## Verification and Routing Pipeline The gateway applies the same strict verification order for authenticated gRPC diff --git a/gateway/docs/examples.md b/gateway/docs/examples.md index db80966..7b3ff8d 100644 --- a/gateway/docs/examples.md +++ b/gateway/docs/examples.md @@ -173,9 +173,9 @@ User-wide event: ```bash redis-cli XADD gateway:client-events '*' \ user_id user-123 \ - event_type fleet.updated \ - event_id event-123 \ - payload_bytes payload-v1 + event_type game.turn.ready \ + event_id notification-route-123 \ + payload_bytes flatbuffers-game-turn-ready ``` Session-targeted event with correlation: @@ -194,6 +194,8 @@ redis-cli XADD gateway:client-events '*' \ Notes: - `payload_bytes` in Redis Stream entries must be binary-safe payload data; +- notification-owned payload bytes should follow + `pkg/schema/fbs/notification.fbs`; - the gateway derives `timestamp_ms`, recomputes `payload_hash`, and signs the outgoing event at delivery time; - each gateway replica consumes streams with plain `XREAD`, so publishers must diff --git a/gateway/internal/events/push_grpc_integration_test.go b/gateway/internal/events/push_grpc_integration_test.go index 7cad073..4c59dda 100644 --- a/gateway/internal/events/push_grpc_integration_test.go +++ b/gateway/internal/events/push_grpc_integration_test.go @@ -15,8 +15,10 @@ import ( "galaxy/gateway/internal/push" "galaxy/gateway/internal/session" gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1" + notificationfbs "galaxy/schema/fbs/notification" "github.com/alicebob/miniredis/v2" + flatbuffers "github.com/google/flatbuffers/go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -63,31 +65,37 @@ func TestSubscribeEventsFanOutsUserTargetedEventToAllUserSessions(t *testing.T) require.NoError(t, err) assertPushBootstrapEvent(t, recvPushEvent(t, unrelated), "request-3", "trace-device-session-3") + payloadBytes := buildGameTurnReadyPayload(t, "game-123", 54) addClientEvent(t, server, "gateway:client_events", map[string]any{ "user_id": "user-123", - "event_type": "fleet.updated", + "event_type": "game.turn.ready", "event_id": "event-123", - "payload_bytes": []byte("payload-123"), + "payload_bytes": payloadBytes, "request_id": "request-123", "trace_id": "trace-123", }) - assertSignedPushEvent(t, recvPushEvent(t, targetOne), push.Event{ + firstDelivered := recvPushEvent(t, targetOne) + assertSignedPushEvent(t, firstDelivered, push.Event{ UserID: "user-123", - EventType: "fleet.updated", + EventType: "game.turn.ready", EventID: "event-123", - PayloadBytes: []byte("payload-123"), + PayloadBytes: payloadBytes, RequestID: "request-123", TraceID: "trace-123", }) - assertSignedPushEvent(t, recvPushEvent(t, targetTwo), push.Event{ + assertDecodedGameTurnReadyPayload(t, firstDelivered.GetPayloadBytes(), "game-123", 54) + + secondDelivered := recvPushEvent(t, targetTwo) + assertSignedPushEvent(t, secondDelivered, push.Event{ UserID: "user-123", - EventType: "fleet.updated", + EventType: "game.turn.ready", EventID: "event-123", - PayloadBytes: []byte("payload-123"), + PayloadBytes: payloadBytes, RequestID: "request-123", TraceID: "trace-123", }) + assertDecodedGameTurnReadyPayload(t, secondDelivered.GetPayloadBytes(), "game-123", 54) assertNoPushEvent(t, unrelated, cancelUnrelated) } @@ -414,3 +422,26 @@ func pushResponseSignerPublicKey() ed25519.PublicKey { seed := sha256.Sum256([]byte("gateway-events-grpc-test-response")) return ed25519.NewKeyFromSeed(seed[:]).Public().(ed25519.PublicKey) } + +func buildGameTurnReadyPayload(t *testing.T, gameID string, turnNumber int64) []byte { + t.Helper() + + builder := flatbuffers.NewBuilder(64) + gameIDOffset := builder.CreateString(gameID) + + notificationfbs.GameTurnReadyEventStart(builder) + notificationfbs.GameTurnReadyEventAddGameId(builder, gameIDOffset) + notificationfbs.GameTurnReadyEventAddTurnNumber(builder, turnNumber) + offset := notificationfbs.GameTurnReadyEventEnd(builder) + notificationfbs.FinishGameTurnReadyEventBuffer(builder, offset) + + return builder.FinishedBytes() +} + +func assertDecodedGameTurnReadyPayload(t *testing.T, payload []byte, wantGameID string, wantTurnNumber int64) { + t.Helper() + + event := notificationfbs.GetRootAsGameTurnReadyEvent(payload, 0) + require.Equal(t, wantGameID, string(event.GameId())) + require.Equal(t, wantTurnNumber, event.TurnNumber()) +} diff --git a/geoprofile/PLAN.md b/geoprofile/PLAN.md index c7ffce1..dc412af 100644 --- a/geoprofile/PLAN.md +++ b/geoprofile/PLAN.md @@ -297,9 +297,10 @@ Goal: Tasks: -- Define the event payload for `country_review_recommended=true`. -- Implement event publication on transition to `true`. -- Implement configuration-driven notification handoff through +- Define the normalized notification-intent payload for + `geo.review_recommended`. +- Implement intent publication on transition to `true`. +- Implement configuration-driven administrator-notification handoff through `Notification Service`. - Add notification deduplication or transition-only logic to prevent spam. - Add failure metrics for both event publication and downstream notification diff --git a/geoprofile/README.md b/geoprofile/README.md index 6bf5034..b324212 100644 --- a/geoprofile/README.md +++ b/geoprofile/README.md @@ -806,10 +806,21 @@ Notification routing is triggered only when: - `country_review_recommended` transitions to `true` - Email notifications are enabled -`Notification Service` may then fan out e-mail delivery through -`Mail Service`. +`Geo Profile Service` publishes normalized notification intent +`geo.review_recommended` into `notification:intents` with +`audience_kind=admin_email`. +Go implementations should use the shared `galaxy/notificationintent` module +for this publication path. +`Notification Service` then resolves the administrator email list from its own +configuration and fans out e-mail delivery through `Mail Service`. Geo Profile Service itself never sends mail directly. -That path is auxiliary and must not be required for business correctness. +This path is unrelated to auth-code delivery, which remains a direct +`Auth / Session Service -> Mail Service` flow and bypasses +`Notification Service`. +That path is auxiliary and must not be required for business correctness. If +the notification intent append fails after the review state transition is +stored, the transition remains committed and the failure is handled as +notification degradation. ## Event Bus Integration diff --git a/go.work b/go.work index 9558d56..e2b661f 100644 --- a/go.work +++ b/go.work @@ -7,11 +7,13 @@ use ( ./gateway ./integration ./mail + ./notification ./pkg/calc ./pkg/connector ./pkg/error ./pkg/geoip ./pkg/model + ./pkg/notificationintent ./pkg/schema ./pkg/storage ./pkg/transcoder @@ -25,6 +27,7 @@ replace ( galaxy/error v0.0.0 => ./pkg/error galaxy/geoip v0.0.0 => ./pkg/geoip galaxy/model v0.0.0 => ./pkg/model + galaxy/notificationintent v0.0.0 => ./pkg/notificationintent galaxy/schema v0.0.0 => ./pkg/schema galaxy/storage v0.0.0 => ./pkg/storage galaxy/transcoder v0.0.0 => ./pkg/transcoder diff --git a/integration/README.md b/integration/README.md index 5aab219..0074c53 100644 --- a/integration/README.md +++ b/integration/README.md @@ -14,6 +14,8 @@ integration/ ├── gatewayauthsessionmail/ │ ├── gateway_authsession_mail_test.go │ └── harness_test.go +├── gatewayauthsessionusermail/ +│ └── gateway_authsession_user_mail_test.go ├── authsessionuser/ │ ├── authsession_user_test.go │ └── harness_test.go @@ -26,6 +28,12 @@ integration/ ├── gatewayuser/ │ ├── gateway_user_test.go │ └── harness_test.go +├── notificationgateway/ +│ └── notification_gateway_test.go +├── notificationmail/ +│ └── notification_mail_test.go +├── notificationuser/ +│ └── notification_user_test.go ├── go.mod ├── go.sum └── internal/ @@ -60,12 +68,27 @@ integration/ - `gatewayauthsessionmail` verifies the public auth flow across real `Edge Gateway`, real `Auth / Session Service`, and real `Mail Service`. - `gatewayuser` verifies the direct authenticated self-service boundary between real `Edge Gateway` and real `User Service`. - `gatewayauthsessionuser` verifies the full public-auth plus authenticated-account chain across real `Edge Gateway`, real `Auth / Session Service`, and real `User Service`. +- `notificationgateway` verifies that real `Notification Service` push + publication is consumed and fanned out by real `Edge Gateway` for all + user-facing push types. +- `notificationmail` verifies that real `Notification Service` template-mode + mail publication is consumed by real `Mail Service` for all notification + email types. +- `notificationuser` verifies that real `Notification Service` enriches + recipients through real `User Service` and preserves Redis stream progress + semantics for missing or temporarily unavailable users. +- `gatewayauthsessionusermail` verifies the full public registration chain + across real `Edge Gateway`, real `Auth / Session Service`, real + `User Service`, and real `Mail Service`, including the regression that + auth-code mail bypasses `notification:intents`. The current fast suites still use one isolated `miniredis` instance plus either real downstream processes or external stateful HTTP stubs where appropriate. -`authsessionmail` and `gatewayauthsessionmail` are the deliberate exceptions: -they use one real Redis container through `testcontainers-go`, because those -boundaries must exercise the real Redis-backed `Mail Service` runtime. +`authsessionmail`, `gatewayauthsessionmail`, `notificationgateway`, +`notificationmail`, `notificationuser`, and `gatewayauthsessionusermail` are +the deliberate exceptions: they use one real Redis container through +`testcontainers-go`, because those boundaries must exercise real Redis stream, +persistence, or scheduling behavior. `authsessionmail` additionally contains one targeted SMTP-capture scenario for the real `smtp` provider path, while `gatewayauthsessionmail` keeps `Mail Service` in `stub` mode and extracts the confirmation code through the trusted @@ -83,6 +106,10 @@ go test ./authsessionmail/... go test ./gatewayauthsessionmail/... go test ./gatewayuser/... go test ./gatewayauthsessionuser/... +go test ./notificationgateway/... +go test ./notificationmail/... +go test ./notificationuser/... +go test ./gatewayauthsessionusermail/... ``` Useful regression commands after boundary changes: @@ -94,6 +121,10 @@ go test ./authsessionmail/... go test ./gatewayauthsessionmail/... go test ./gatewayuser/... go test ./gatewayauthsessionuser/... +go test ./notificationgateway/... +go test ./notificationmail/... +go test ./notificationuser/... +go test ./gatewayauthsessionusermail/... cd ../gateway && go test ./... cd ../authsession && go test ./... -run GatewayCompatibility cd ../user && go test ./... diff --git a/integration/gatewayauthsessionmail/gateway_authsession_mail_test.go b/integration/gatewayauthsessionmail/gateway_authsession_mail_test.go index 11e3648..4480198 100644 --- a/integration/gatewayauthsessionmail/gateway_authsession_mail_test.go +++ b/integration/gatewayauthsessionmail/gateway_authsession_mail_test.go @@ -85,3 +85,22 @@ func TestGatewayAuthsessionMailUnavailablePassesThroughGatewaySurface(t *testing require.Equal(t, http.StatusServiceUnavailable, response.StatusCode) require.JSONEq(t, `{"error":{"code":"service_unavailable","message":"service is unavailable"}}`, response.Body) } + +func TestGatewayAuthsessionMailAuthCodeBypassesNotificationStream(t *testing.T) { + h := newGatewayAuthsessionMailHarness(t) + + h.sendChallengeWithAcceptLanguage(t, testEmail, "en") + + list := h.eventuallyListDeliveries(t, url.Values{ + "source": []string{"authsession"}, + "recipient": []string{testEmail}, + "template_id": []string{"auth.login_code"}, + }) + require.Len(t, list.Items, 1) + require.Equal(t, "authsession", list.Items[0].Source) + require.Equal(t, "auth.login_code", list.Items[0].TemplateID) + + length, err := h.redis.XLen(context.Background(), "notification:intents").Result() + require.NoError(t, err) + require.Zero(t, length) +} diff --git a/integration/gatewayauthsessionusermail/gateway_authsession_user_mail_test.go b/integration/gatewayauthsessionusermail/gateway_authsession_user_mail_test.go new file mode 100644 index 0000000..64a9b27 --- /dev/null +++ b/integration/gatewayauthsessionusermail/gateway_authsession_user_mail_test.go @@ -0,0 +1,691 @@ +package gatewayauthsessionusermail_test + +import ( + "bytes" + "context" + "crypto/ed25519" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "errors" + "io" + "net/http" + "net/url" + "path/filepath" + "runtime" + "testing" + "time" + + gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1" + contractsgatewayv1 "galaxy/integration/internal/contracts/gatewayv1" + "galaxy/integration/internal/harness" + + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + gatewaySendEmailCodePath = "/api/v1/public/auth/send-email-code" + gatewayConfirmEmailCodePath = "/api/v1/public/auth/confirm-email-code" + mailDeliveriesPath = "/api/v1/internal/deliveries" + + testEmail = "pilot@example.com" + testTimeZone = "Europe/Kaliningrad" +) + +func TestGatewayAuthsessionUserMailRegistrationCreatesUserProjectsSessionAndBypassesNotification(t *testing.T) { + h := newGatewayAuthsessionUserMailHarness(t) + + clientPrivateKey := newClientPrivateKey("full-chain") + challengeID := h.sendChallengeWithAcceptLanguage(t, testEmail, "fr-FR, en;q=0.8") + + list := h.eventuallyListDeliveries(t, url.Values{ + "source": []string{"authsession"}, + "recipient": []string{testEmail}, + "template_id": []string{"auth.login_code"}, + }) + require.Len(t, list.Items, 1) + require.Equal(t, "authsession", list.Items[0].Source) + require.Equal(t, "auth.login_code", list.Items[0].TemplateID) + require.Equal(t, "fr-FR", list.Items[0].Locale) + require.Equal(t, []string{testEmail}, list.Items[0].To) + + detail := h.getDelivery(t, list.Items[0].DeliveryID) + code := templateVariableString(t, detail.TemplateVariables, "code") + + confirm := h.confirmCode(t, challengeID, code, clientPrivateKey) + require.Equal(t, http.StatusOK, confirm.StatusCode, confirm.Body) + + var confirmBody confirmEmailCodeResponse + require.NoError(t, decodeStrictJSONPayload([]byte(confirm.Body), &confirmBody)) + require.NotEmpty(t, confirmBody.DeviceSessionID) + + account := h.lookupUserByEmail(t, testEmail) + require.Equal(t, testEmail, account.User.Email) + require.Equal(t, "fr-FR", account.User.PreferredLanguage) + require.Equal(t, testTimeZone, account.User.TimeZone) + require.NotEmpty(t, account.User.UserID) + + record := h.waitForGatewaySession(t, confirmBody.DeviceSessionID) + require.Equal(t, gatewaySessionRecord{ + DeviceSessionID: confirmBody.DeviceSessionID, + UserID: account.User.UserID, + ClientPublicKey: encodePublicKey(clientPrivateKey.Public().(ed25519.PublicKey)), + Status: "active", + }, record) + + conn := h.dialGateway(t) + client := gatewayv1.NewEdgeGatewayClient(conn) + stream, err := client.SubscribeEvents(context.Background(), newSubscribeEventsRequest(confirmBody.DeviceSessionID, "request-bootstrap", clientPrivateKey)) + require.NoError(t, err) + assertBootstrapEvent(t, recvGatewayEvent(t, stream), h.responseSignerPublicKey, "request-bootstrap") + + length, err := h.redis.XLen(context.Background(), "notification:intents").Result() + require.NoError(t, err) + require.Zero(t, length) +} + +type gatewayAuthsessionUserMailHarness struct { + redis *redis.Client + + userServiceURL string + gatewayPublicURL string + gatewayGRPCAddr string + mailInternalURL string + + responseSignerPublicKey ed25519.PublicKey + + gatewayProcess *harness.Process + authsessionProcess *harness.Process + userServiceProcess *harness.Process + mailProcess *harness.Process +} + +type httpResponse struct { + StatusCode int + Body string + Header http.Header +} + +type sendEmailCodeResponse struct { + ChallengeID string `json:"challenge_id"` +} + +type confirmEmailCodeResponse struct { + DeviceSessionID string `json:"device_session_id"` +} + +type gatewaySessionRecord struct { + DeviceSessionID string `json:"device_session_id"` + UserID string `json:"user_id"` + ClientPublicKey string `json:"client_public_key"` + Status string `json:"status"` + RevokedAtMS *int64 `json:"revoked_at_ms,omitempty"` +} + +type mailDeliveryListResponse struct { + Items []mailDeliverySummary `json:"items"` +} + +type mailDeliverySummary struct { + DeliveryID string `json:"delivery_id"` + Source string `json:"source"` + TemplateID string `json:"template_id"` + Locale string `json:"locale"` + To []string `json:"to"` + Status string `json:"status"` +} + +type mailDeliveryDetailResponse struct { + DeliveryID string `json:"delivery_id"` + Source string `json:"source"` + TemplateID string `json:"template_id"` + Locale string `json:"locale"` + To []string `json:"to"` + IdempotencyKey string `json:"idempotency_key"` + Status string `json:"status"` + TemplateVariables map[string]any `json:"template_variables,omitempty"` +} + +type userLookupResponse struct { + User accountView `json:"user"` +} + +type accountView struct { + UserID string `json:"user_id"` + Email string `json:"email"` + PreferredLanguage string `json:"preferred_language"` + TimeZone string `json:"time_zone"` +} + +func newGatewayAuthsessionUserMailHarness(t *testing.T) *gatewayAuthsessionUserMailHarness { + t.Helper() + + redisRuntime := harness.StartRedisContainer(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisRuntime.Addr, + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, redisClient.Close()) + }) + + responseSignerPath, responseSignerPublicKey := harness.WriteResponseSignerPEM(t, t.Name()) + userServiceAddr := harness.FreeTCPAddress(t) + mailInternalAddr := harness.FreeTCPAddress(t) + authsessionPublicAddr := harness.FreeTCPAddress(t) + authsessionInternalAddr := harness.FreeTCPAddress(t) + gatewayPublicAddr := harness.FreeTCPAddress(t) + gatewayGRPCAddr := harness.FreeTCPAddress(t) + + userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice") + mailBinary := harness.BuildBinary(t, "mail", "./mail/cmd/mail") + authsessionBinary := harness.BuildBinary(t, "authsession", "./authsession/cmd/authsession") + gatewayBinary := harness.BuildBinary(t, "gateway", "./gateway/cmd/gateway") + + userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, map[string]string{ + "USERSERVICE_LOG_LEVEL": "info", + "USERSERVICE_INTERNAL_HTTP_ADDR": userServiceAddr, + "USERSERVICE_REDIS_ADDR": redisRuntime.Addr, + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr) + + mailProcess := harness.StartProcess(t, "mail", mailBinary, map[string]string{ + "MAIL_LOG_LEVEL": "info", + "MAIL_INTERNAL_HTTP_ADDR": mailInternalAddr, + "MAIL_REDIS_ADDR": redisRuntime.Addr, + "MAIL_TEMPLATE_DIR": moduleTemplateDir(t), + "MAIL_SMTP_MODE": "stub", + "MAIL_STREAM_BLOCK_TIMEOUT": "100ms", + "MAIL_OPERATOR_REQUEST_TIMEOUT": time.Second.String(), + "MAIL_SHUTDOWN_TIMEOUT": "2s", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForMailReady(t, mailProcess, "http://"+mailInternalAddr) + + authsessionProcess := harness.StartProcess(t, "authsession", authsessionBinary, map[string]string{ + "AUTHSESSION_LOG_LEVEL": "info", + "AUTHSESSION_PUBLIC_HTTP_ADDR": authsessionPublicAddr, + "AUTHSESSION_PUBLIC_HTTP_REQUEST_TIMEOUT": time.Second.String(), + "AUTHSESSION_INTERNAL_HTTP_ADDR": authsessionInternalAddr, + "AUTHSESSION_INTERNAL_HTTP_REQUEST_TIMEOUT": time.Second.String(), + "AUTHSESSION_REDIS_ADDR": redisRuntime.Addr, + "AUTHSESSION_USER_SERVICE_MODE": "rest", + "AUTHSESSION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr, + "AUTHSESSION_USER_SERVICE_REQUEST_TIMEOUT": time.Second.String(), + "AUTHSESSION_MAIL_SERVICE_MODE": "rest", + "AUTHSESSION_MAIL_SERVICE_BASE_URL": "http://" + mailInternalAddr, + "AUTHSESSION_MAIL_SERVICE_REQUEST_TIMEOUT": time.Second.String(), + "AUTHSESSION_REDIS_GATEWAY_SESSION_CACHE_KEY_PREFIX": "gateway:session:", + "AUTHSESSION_REDIS_GATEWAY_SESSION_EVENTS_STREAM": "gateway:session_events", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForAuthsessionPublicReady(t, authsessionProcess, "http://"+authsessionPublicAddr) + + gatewayProcess := harness.StartProcess(t, "gateway", gatewayBinary, map[string]string{ + "GATEWAY_LOG_LEVEL": "info", + "GATEWAY_PUBLIC_HTTP_ADDR": gatewayPublicAddr, + "GATEWAY_AUTHENTICATED_GRPC_ADDR": gatewayGRPCAddr, + "GATEWAY_SESSION_CACHE_REDIS_ADDR": redisRuntime.Addr, + "GATEWAY_SESSION_CACHE_REDIS_KEY_PREFIX": "gateway:session:", + "GATEWAY_SESSION_EVENTS_REDIS_STREAM": "gateway:session_events", + "GATEWAY_CLIENT_EVENTS_REDIS_STREAM": "gateway:client_events", + "GATEWAY_REPLAY_REDIS_KEY_PREFIX": "gateway:replay:", + "GATEWAY_RESPONSE_SIGNER_PRIVATE_KEY_PEM_PATH": filepath.Clean(responseSignerPath), + "GATEWAY_AUTH_SERVICE_BASE_URL": "http://" + authsessionPublicAddr, + "GATEWAY_PUBLIC_AUTH_UPSTREAM_TIMEOUT": (500 * time.Millisecond).String(), + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_REQUESTS": "100", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_WINDOW": "1s", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_BURST": "100", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s", + "GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + harness.WaitForHTTPStatus(t, gatewayProcess, "http://"+gatewayPublicAddr+"/healthz", http.StatusOK) + harness.WaitForTCP(t, gatewayProcess, gatewayGRPCAddr) + + return &gatewayAuthsessionUserMailHarness{ + redis: redisClient, + userServiceURL: "http://" + userServiceAddr, + gatewayPublicURL: "http://" + gatewayPublicAddr, + gatewayGRPCAddr: gatewayGRPCAddr, + mailInternalURL: "http://" + mailInternalAddr, + responseSignerPublicKey: responseSignerPublicKey, + gatewayProcess: gatewayProcess, + authsessionProcess: authsessionProcess, + userServiceProcess: userServiceProcess, + mailProcess: mailProcess, + } +} + +func (h *gatewayAuthsessionUserMailHarness) sendChallengeWithAcceptLanguage(t *testing.T, email string, acceptLanguage string) string { + t.Helper() + + response := postJSONValueWithHeaders( + t, + h.gatewayPublicURL+gatewaySendEmailCodePath, + map[string]string{"email": email}, + map[string]string{"Accept-Language": acceptLanguage}, + ) + require.Equal(t, http.StatusOK, response.StatusCode, response.Body) + + var body sendEmailCodeResponse + require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), &body)) + require.NotEmpty(t, body.ChallengeID) + return body.ChallengeID +} + +func (h *gatewayAuthsessionUserMailHarness) confirmCode(t *testing.T, challengeID string, code string, clientPrivateKey ed25519.PrivateKey) httpResponse { + t.Helper() + + return postJSONValue(t, h.gatewayPublicURL+gatewayConfirmEmailCodePath, map[string]string{ + "challenge_id": challengeID, + "code": code, + "client_public_key": encodePublicKey(clientPrivateKey.Public().(ed25519.PublicKey)), + "time_zone": testTimeZone, + }) +} + +func (h *gatewayAuthsessionUserMailHarness) eventuallyListDeliveries(t *testing.T, query url.Values) mailDeliveryListResponse { + t.Helper() + + var response mailDeliveryListResponse + require.Eventually(t, func() bool { + response = h.listDeliveries(t, query) + return len(response.Items) > 0 + }, 10*time.Second, 50*time.Millisecond) + + return response +} + +func (h *gatewayAuthsessionUserMailHarness) listDeliveries(t *testing.T, query url.Values) mailDeliveryListResponse { + t.Helper() + + target := h.mailInternalURL + mailDeliveriesPath + if encoded := query.Encode(); encoded != "" { + target += "?" + encoded + } + + request, err := http.NewRequest(http.MethodGet, target, nil) + require.NoError(t, err) + + return doJSONRequest[mailDeliveryListResponse](t, request, http.StatusOK) +} + +func (h *gatewayAuthsessionUserMailHarness) getDelivery(t *testing.T, deliveryID string) mailDeliveryDetailResponse { + t.Helper() + + request, err := http.NewRequest(http.MethodGet, h.mailInternalURL+mailDeliveriesPath+"/"+url.PathEscape(deliveryID), nil) + require.NoError(t, err) + + return doJSONRequest[mailDeliveryDetailResponse](t, request, http.StatusOK) +} + +func (h *gatewayAuthsessionUserMailHarness) lookupUserByEmail(t *testing.T, email string) userLookupResponse { + t.Helper() + + response := postJSONValue(t, h.userServiceURL+"/api/v1/internal/user-lookups/by-email", map[string]string{ + "email": email, + }) + return decodeJSONResponse[userLookupResponse](t, response, http.StatusOK) +} + +func (h *gatewayAuthsessionUserMailHarness) waitForGatewaySession(t *testing.T, deviceSessionID string) gatewaySessionRecord { + t.Helper() + + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + payload, err := h.redis.Get(context.Background(), "gateway:session:"+deviceSessionID).Bytes() + if err == nil { + var record gatewaySessionRecord + require.NoError(t, decodeStrictJSONPayload(payload, &record)) + return record + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("gateway session projection for %s was not published in time", deviceSessionID) + return gatewaySessionRecord{} +} + +func (h *gatewayAuthsessionUserMailHarness) dialGateway(t *testing.T) *grpc.ClientConn { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + conn, err := grpc.DialContext( + ctx, + h.gatewayGRPCAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithBlock(), + ) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, conn.Close()) + }) + + return conn +} + +func postJSONValue(t *testing.T, targetURL string, body any) httpResponse { + t.Helper() + + return postJSONValueWithHeaders(t, targetURL, body, nil) +} + +func postJSONValueWithHeaders(t *testing.T, targetURL string, body any, headers map[string]string) httpResponse { + t.Helper() + + payload, err := json.Marshal(body) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload)) + require.NoError(t, err) + request.Header.Set("Content-Type", "application/json") + for key, value := range headers { + if value == "" { + continue + } + request.Header.Set(key, value) + } + + return doRequest(t, request) +} + +func doJSONRequest[T any](t *testing.T, request *http.Request, wantStatus int) T { + t.Helper() + + response := doRequest(t, request) + return decodeJSONResponse[T](t, response, wantStatus) +} + +func decodeJSONResponse[T any](t *testing.T, response httpResponse, wantStatus int) T { + t.Helper() + + require.Equal(t, wantStatus, response.StatusCode, response.Body) + + var decoded T + require.NoError(t, decodeJSONPayload([]byte(response.Body), &decoded), response.Body) + return decoded +} + +func doRequest(t *testing.T, request *http.Request) httpResponse { + t.Helper() + + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + t.Cleanup(client.CloseIdleConnections) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + require.NoError(t, err) + + return httpResponse{ + StatusCode: response.StatusCode, + Body: string(payload), + Header: response.Header.Clone(), + } +} + +func decodeStrictJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func decodeJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func templateVariableString(t *testing.T, variables map[string]any, field string) string { + t.Helper() + + value, ok := variables[field] + require.True(t, ok, "template variable %q is missing", field) + + text, ok := value.(string) + require.True(t, ok, "template variable %q must be a string", field) + require.NotEmpty(t, text) + + return text +} + +func newClientPrivateKey(label string) ed25519.PrivateKey { + seed := sha256.Sum256([]byte("galaxy-integration-gateway-authsession-user-mail-client-" + label)) + return ed25519.NewKeyFromSeed(seed[:]) +} + +func encodePublicKey(publicKey ed25519.PublicKey) string { + return base64.StdEncoding.EncodeToString(publicKey) +} + +func newSubscribeEventsRequest(deviceSessionID string, requestID string, clientPrivateKey ed25519.PrivateKey) *gatewayv1.SubscribeEventsRequest { + payloadHash := contractsgatewayv1.ComputePayloadHash(nil) + + request := &gatewayv1.SubscribeEventsRequest{ + ProtocolVersion: contractsgatewayv1.ProtocolVersionV1, + DeviceSessionId: deviceSessionID, + MessageType: contractsgatewayv1.SubscribeMessageType, + TimestampMs: time.Now().UnixMilli(), + RequestId: requestID, + PayloadHash: payloadHash, + TraceId: "trace-" + requestID, + } + request.Signature = contractsgatewayv1.SignRequest(clientPrivateKey, contractsgatewayv1.RequestSigningFields{ + ProtocolVersion: request.GetProtocolVersion(), + DeviceSessionID: request.GetDeviceSessionId(), + MessageType: request.GetMessageType(), + TimestampMS: request.GetTimestampMs(), + RequestID: request.GetRequestId(), + PayloadHash: request.GetPayloadHash(), + }) + + return request +} + +func recvGatewayEvent(t *testing.T, stream grpc.ServerStreamingClient[gatewayv1.GatewayEvent]) *gatewayv1.GatewayEvent { + t.Helper() + + eventCh := make(chan *gatewayv1.GatewayEvent, 1) + errCh := make(chan error, 1) + go func() { + event, err := stream.Recv() + if err != nil { + errCh <- err + return + } + eventCh <- event + }() + + select { + case event := <-eventCh: + return event + case err := <-errCh: + require.NoError(t, err) + case <-time.After(5 * time.Second): + require.FailNow(t, "timed out waiting for gateway event") + } + + return nil +} + +func assertBootstrapEvent(t *testing.T, event *gatewayv1.GatewayEvent, responseSignerPublicKey ed25519.PublicKey, wantRequestID string) { + t.Helper() + + require.Equal(t, contractsgatewayv1.ServerTimeEventType, event.GetEventType()) + require.Equal(t, wantRequestID, event.GetEventId()) + require.Equal(t, wantRequestID, event.GetRequestId()) + require.NoError(t, contractsgatewayv1.VerifyPayloadHash(event.GetPayloadBytes(), event.GetPayloadHash())) + require.NoError(t, contractsgatewayv1.VerifyEventSignature(responseSignerPublicKey, event.GetSignature(), contractsgatewayv1.EventSigningFields{ + EventType: event.GetEventType(), + EventID: event.GetEventId(), + TimestampMS: event.GetTimestampMs(), + RequestID: event.GetRequestId(), + TraceID: event.GetTraceId(), + PayloadHash: event.GetPayloadHash(), + })) +} + +func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-missing/exists", nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs()) +} + +func waitForMailReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+mailDeliveriesPath, nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for mail readiness: timeout\n%s", process.Logs()) +} + +func waitForAuthsessionPublicReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + response, err := postJSONValueMaybe(client, baseURL+gatewaySendEmailCodePath, map[string]string{ + "email": "", + }) + if err == nil && response.StatusCode == http.StatusBadRequest { + return + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for authsession public readiness: timeout\n%s", process.Logs()) +} + +func postJSONValueMaybe(client *http.Client, targetURL string, body any) (httpResponse, error) { + payload, err := json.Marshal(body) + if err != nil { + return httpResponse{}, err + } + + request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload)) + if err != nil { + return httpResponse{}, err + } + request.Header.Set("Content-Type", "application/json") + + response, err := client.Do(request) + if err != nil { + return httpResponse{}, err + } + defer response.Body.Close() + + responseBody, err := io.ReadAll(response.Body) + if err != nil { + return httpResponse{}, err + } + + return httpResponse{ + StatusCode: response.StatusCode, + Body: string(responseBody), + Header: response.Header.Clone(), + }, nil +} + +func moduleTemplateDir(t *testing.T) string { + t.Helper() + + return filepath.Join(repositoryRoot(t), "mail", "templates") +} + +func repositoryRoot(t *testing.T) string { + t.Helper() + + _, file, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("resolve repository root: runtime caller is unavailable") + } + + return filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..")) +} diff --git a/integration/internal/harness/process.go b/integration/internal/harness/process.go index 0d26fd3..e44ea59 100644 --- a/integration/internal/harness/process.go +++ b/integration/internal/harness/process.go @@ -32,8 +32,9 @@ type Process struct { logsMu sync.Mutex logs bytes.Buffer - doneCh chan struct{} - waitErr error + doneCh chan struct{} + waitErr error + allowUnexpectedExit bool } // StartProcess starts binaryPath with envOverrides and registers cleanup that @@ -82,7 +83,7 @@ func (p *Process) Stop(t testing.TB) { select { case <-p.doneCh: err := p.waitErr - if err != nil && !isExpectedProcessExit(err) { + if err != nil && !isExpectedProcessExit(err) && !p.allowUnexpectedExit { t.Errorf("%s exited unexpectedly: %v", p.name, err) } return @@ -96,7 +97,7 @@ func (p *Process) Stop(t testing.TB) { select { case <-p.doneCh: err := p.waitErr - if err != nil && !isExpectedProcessExit(err) { + if err != nil && !isExpectedProcessExit(err) && !p.allowUnexpectedExit { t.Errorf("%s exited unexpectedly: %v", p.name, err) } case <-time.After(defaultStopWait): @@ -105,12 +106,22 @@ func (p *Process) Stop(t testing.TB) { } <-p.doneCh err := p.waitErr - if err != nil && !isExpectedProcessExit(err) { + if err != nil && !isExpectedProcessExit(err) && !p.allowUnexpectedExit { t.Errorf("%s exited unexpectedly: %v", p.name, err) } } } +// AllowUnexpectedExit marks a process exit as expected for tests that +// deliberately trigger a fatal runtime dependency failure. +func (p *Process) AllowUnexpectedExit() { + if p == nil { + return + } + + p.allowUnexpectedExit = true +} + // Logs returns the captured combined stdout/stderr output of the process. func (p *Process) Logs() string { if p == nil { diff --git a/integration/notificationgateway/notification_gateway_test.go b/integration/notificationgateway/notification_gateway_test.go new file mode 100644 index 0000000..f78d6fa --- /dev/null +++ b/integration/notificationgateway/notification_gateway_test.go @@ -0,0 +1,526 @@ +package notificationgateway_test + +import ( + "bytes" + "context" + "crypto/ed25519" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "errors" + "io" + "net/http" + "path/filepath" + "testing" + "time" + + gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1" + contractsgatewayv1 "galaxy/integration/internal/contracts/gatewayv1" + "galaxy/integration/internal/harness" + + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + notificationGatewayClientEventsStream = "gateway:client_events" + notificationGatewayIntentsStream = "notification:intents" +) + +func TestNotificationGatewayFanOutsAllUserPushTypesToAllUserSessions(t *testing.T) { + h := newNotificationGatewayHarness(t) + + recipient := h.ensureUser(t, "pilot@example.com", "fr-FR") + + firstPrivateKey := newClientPrivateKey("first") + secondPrivateKey := newClientPrivateKey("second") + unrelatedPrivateKey := newClientPrivateKey("unrelated") + h.seedGatewaySession(t, "device-session-1", recipient.UserID, firstPrivateKey) + h.seedGatewaySession(t, "device-session-2", recipient.UserID, secondPrivateKey) + h.seedGatewaySession(t, "device-session-3", "user-unrelated", unrelatedPrivateKey) + + conn := h.dialGateway(t) + client := gatewayv1.NewEdgeGatewayClient(conn) + + firstCtx, cancelFirst := context.WithCancel(context.Background()) + defer cancelFirst() + firstStream, err := client.SubscribeEvents(firstCtx, newSubscribeEventsRequest("device-session-1", "request-1", firstPrivateKey)) + require.NoError(t, err) + assertBootstrapEvent(t, recvGatewayEvent(t, firstStream), h.responseSignerPublicKey, "request-1") + + secondCtx, cancelSecond := context.WithCancel(context.Background()) + defer cancelSecond() + secondStream, err := client.SubscribeEvents(secondCtx, newSubscribeEventsRequest("device-session-2", "request-2", secondPrivateKey)) + require.NoError(t, err) + assertBootstrapEvent(t, recvGatewayEvent(t, secondStream), h.responseSignerPublicKey, "request-2") + + unrelatedCtx, cancelUnrelated := context.WithCancel(context.Background()) + defer cancelUnrelated() + unrelatedStream, err := client.SubscribeEvents(unrelatedCtx, newSubscribeEventsRequest("device-session-3", "request-3", unrelatedPrivateKey)) + require.NoError(t, err) + assertBootstrapEvent(t, recvGatewayEvent(t, unrelatedStream), h.responseSignerPublicKey, "request-3") + + cases := []pushIntentCase{ + { + notificationType: "game.turn.ready", + producer: "game_master", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + }, + { + notificationType: "game.finished", + producer: "game_master", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","final_turn_number":55}`, + }, + { + notificationType: "lobby.application.submitted", + producer: "game_lobby", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","applicant_user_id":"applicant-1","applicant_name":"Nova Pilot"}`, + }, + { + notificationType: "lobby.membership.approved", + producer: "game_lobby", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash"}`, + }, + { + notificationType: "lobby.membership.rejected", + producer: "game_lobby", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash"}`, + }, + { + notificationType: "lobby.invite.created", + producer: "game_lobby", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","inviter_user_id":"owner-1","inviter_name":"Owner Pilot"}`, + }, + { + notificationType: "lobby.invite.redeemed", + producer: "game_lobby", + payloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_user_id":"invitee-1","invitee_name":"Nova Pilot"}`, + }, + } + + for index, tc := range cases { + messageID := h.publishPushIntent(t, tc, recipient.UserID, index) + + firstEvent := recvGatewayEvent(t, firstStream) + assertNotificationPushEvent(t, firstEvent, h.responseSignerPublicKey, tc.notificationType, messageID, recipient.UserID, index) + secondEvent := recvGatewayEvent(t, secondStream) + assertNotificationPushEvent(t, secondEvent, h.responseSignerPublicKey, tc.notificationType, messageID, recipient.UserID, index) + } + assertNoGatewayEvent(t, unrelatedStream, cancelUnrelated) + + messages, err := h.redis.XRange(context.Background(), notificationGatewayClientEventsStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, len(cases)) + for index, message := range messages { + require.Equal(t, recipient.UserID, message.Values["user_id"]) + require.Equal(t, cases[index].notificationType, message.Values["event_type"]) + require.NotContains(t, message.Values, "device_session_id") + } +} + +type notificationGatewayHarness struct { + redis *redis.Client + + userServiceURL string + + gatewayGRPCAddr string + responseSignerPublicKey ed25519.PublicKey + + notificationProcess *harness.Process + gatewayProcess *harness.Process + userServiceProcess *harness.Process +} + +type pushIntentCase struct { + notificationType string + producer string + payloadJSON string +} + +type ensureByEmailResponse struct { + Outcome string `json:"outcome"` + UserID string `json:"user_id"` +} + +func newNotificationGatewayHarness(t *testing.T) *notificationGatewayHarness { + t.Helper() + + redisRuntime := harness.StartRedisContainer(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisRuntime.Addr, + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, redisClient.Close()) + }) + + responseSignerPath, responseSignerPublicKey := harness.WriteResponseSignerPEM(t, t.Name()) + userServiceAddr := harness.FreeTCPAddress(t) + notificationInternalAddr := harness.FreeTCPAddress(t) + gatewayPublicAddr := harness.FreeTCPAddress(t) + gatewayGRPCAddr := harness.FreeTCPAddress(t) + + userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice") + notificationBinary := harness.BuildBinary(t, "notification", "./notification/cmd/notification") + gatewayBinary := harness.BuildBinary(t, "gateway", "./gateway/cmd/gateway") + + userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, map[string]string{ + "USERSERVICE_LOG_LEVEL": "info", + "USERSERVICE_INTERNAL_HTTP_ADDR": userServiceAddr, + "USERSERVICE_REDIS_ADDR": redisRuntime.Addr, + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr) + + notificationProcess := harness.StartProcess(t, "notification", notificationBinary, map[string]string{ + "NOTIFICATION_LOG_LEVEL": "info", + "NOTIFICATION_INTERNAL_HTTP_ADDR": notificationInternalAddr, + "NOTIFICATION_REDIS_ADDR": redisRuntime.Addr, + "NOTIFICATION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr, + "NOTIFICATION_USER_SERVICE_TIMEOUT": time.Second.String(), + "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MIN": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MAX": "100ms", + "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM": notificationGatewayClientEventsStream, + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + harness.WaitForHTTPStatus(t, notificationProcess, "http://"+notificationInternalAddr+"/readyz", http.StatusOK) + + gatewayProcess := harness.StartProcess(t, "gateway", gatewayBinary, map[string]string{ + "GATEWAY_LOG_LEVEL": "info", + "GATEWAY_PUBLIC_HTTP_ADDR": gatewayPublicAddr, + "GATEWAY_AUTHENTICATED_GRPC_ADDR": gatewayGRPCAddr, + "GATEWAY_SESSION_CACHE_REDIS_ADDR": redisRuntime.Addr, + "GATEWAY_SESSION_CACHE_REDIS_KEY_PREFIX": "gateway:session:", + "GATEWAY_SESSION_EVENTS_REDIS_STREAM": "gateway:session_events", + "GATEWAY_CLIENT_EVENTS_REDIS_STREAM": notificationGatewayClientEventsStream, + "GATEWAY_CLIENT_EVENTS_REDIS_READ_BLOCK_TIMEOUT": "100ms", + "GATEWAY_REPLAY_REDIS_KEY_PREFIX": "gateway:replay:", + "GATEWAY_RESPONSE_SIGNER_PRIVATE_KEY_PEM_PATH": filepath.Clean(responseSignerPath), + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + harness.WaitForHTTPStatus(t, gatewayProcess, "http://"+gatewayPublicAddr+"/healthz", http.StatusOK) + harness.WaitForTCP(t, gatewayProcess, gatewayGRPCAddr) + + return ¬ificationGatewayHarness{ + redis: redisClient, + userServiceURL: "http://" + userServiceAddr, + gatewayGRPCAddr: gatewayGRPCAddr, + responseSignerPublicKey: responseSignerPublicKey, + notificationProcess: notificationProcess, + gatewayProcess: gatewayProcess, + userServiceProcess: userServiceProcess, + } +} + +func (h *notificationGatewayHarness) ensureUser(t *testing.T, email string, preferredLanguage string) ensureByEmailResponse { + t.Helper() + + response := postJSONValue(t, h.userServiceURL+"/api/v1/internal/users/ensure-by-email", map[string]any{ + "email": email, + "registration_context": map[string]string{ + "preferred_language": preferredLanguage, + "time_zone": "Europe/Kaliningrad", + }, + }) + + var body ensureByEmailResponse + requireJSONStatus(t, response, http.StatusOK, &body) + require.Equal(t, "created", body.Outcome) + require.NotEmpty(t, body.UserID) + return body +} + +func (h *notificationGatewayHarness) dialGateway(t *testing.T) *grpc.ClientConn { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + conn, err := grpc.DialContext( + ctx, + h.gatewayGRPCAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithBlock(), + ) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, conn.Close()) + }) + + return conn +} + +func (h *notificationGatewayHarness) seedGatewaySession(t *testing.T, deviceSessionID string, userID string, clientPrivateKey ed25519.PrivateKey) { + t.Helper() + + record := gatewaySessionRecord{ + DeviceSessionID: deviceSessionID, + UserID: userID, + ClientPublicKey: base64.StdEncoding.EncodeToString(clientPrivateKey.Public().(ed25519.PublicKey)), + Status: "active", + } + payload, err := json.Marshal(record) + require.NoError(t, err) + require.NoError(t, h.redis.Set(context.Background(), "gateway:session:"+deviceSessionID, payload, 0).Err()) +} + +func (h *notificationGatewayHarness) publishPushIntent(t *testing.T, tc pushIntentCase, recipientUserID string, index int) string { + t.Helper() + + messageID, err := h.redis.XAdd(context.Background(), &redis.XAddArgs{ + Stream: notificationGatewayIntentsStream, + Values: map[string]any{ + "notification_type": tc.notificationType, + "producer": tc.producer, + "audience_kind": "user", + "recipient_user_ids_json": `["` + recipientUserID + `"]`, + "idempotency_key": tc.notificationType + ":gateway:" + string(rune('a'+index)), + "occurred_at_ms": "1775121700000", + "request_id": pushRequestID(index), + "trace_id": pushTraceID(index), + "payload_json": tc.payloadJSON, + }, + }).Result() + require.NoError(t, err) + + return messageID +} + +type gatewaySessionRecord struct { + DeviceSessionID string `json:"device_session_id"` + UserID string `json:"user_id"` + ClientPublicKey string `json:"client_public_key"` + Status string `json:"status"` + RevokedAtMS *int64 `json:"revoked_at_ms,omitempty"` +} + +type httpResponse struct { + StatusCode int + Body string + Header http.Header +} + +func postJSONValue(t *testing.T, targetURL string, body any) httpResponse { + t.Helper() + + payload, err := json.Marshal(body) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload)) + require.NoError(t, err) + request.Header.Set("Content-Type", "application/json") + return doRequest(t, request) +} + +func requireJSONStatus(t *testing.T, response httpResponse, wantStatus int, target any) { + t.Helper() + + require.Equal(t, wantStatus, response.StatusCode, "response body: %s", response.Body) + require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), target)) +} + +func doRequest(t *testing.T, request *http.Request) httpResponse { + t.Helper() + + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + t.Cleanup(client.CloseIdleConnections) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + require.NoError(t, err) + + return httpResponse{ + StatusCode: response.StatusCode, + Body: string(payload), + Header: response.Header.Clone(), + } +} + +func decodeStrictJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-missing/exists", nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs()) +} + +func newClientPrivateKey(label string) ed25519.PrivateKey { + seed := sha256.Sum256([]byte("galaxy-integration-notification-gateway-client-" + label)) + return ed25519.NewKeyFromSeed(seed[:]) +} + +func newSubscribeEventsRequest(deviceSessionID string, requestID string, clientPrivateKey ed25519.PrivateKey) *gatewayv1.SubscribeEventsRequest { + payloadHash := contractsgatewayv1.ComputePayloadHash(nil) + + request := &gatewayv1.SubscribeEventsRequest{ + ProtocolVersion: contractsgatewayv1.ProtocolVersionV1, + DeviceSessionId: deviceSessionID, + MessageType: contractsgatewayv1.SubscribeMessageType, + TimestampMs: time.Now().UnixMilli(), + RequestId: requestID, + PayloadHash: payloadHash, + TraceId: "trace-" + requestID, + } + request.Signature = contractsgatewayv1.SignRequest(clientPrivateKey, contractsgatewayv1.RequestSigningFields{ + ProtocolVersion: request.GetProtocolVersion(), + DeviceSessionID: request.GetDeviceSessionId(), + MessageType: request.GetMessageType(), + TimestampMS: request.GetTimestampMs(), + RequestID: request.GetRequestId(), + PayloadHash: request.GetPayloadHash(), + }) + + return request +} + +func recvGatewayEvent(t *testing.T, stream grpc.ServerStreamingClient[gatewayv1.GatewayEvent]) *gatewayv1.GatewayEvent { + t.Helper() + + eventCh := make(chan *gatewayv1.GatewayEvent, 1) + errCh := make(chan error, 1) + go func() { + event, err := stream.Recv() + if err != nil { + errCh <- err + return + } + eventCh <- event + }() + + select { + case event := <-eventCh: + return event + case err := <-errCh: + require.NoError(t, err) + case <-time.After(5 * time.Second): + require.FailNow(t, "timed out waiting for gateway event") + } + + return nil +} + +func assertBootstrapEvent(t *testing.T, event *gatewayv1.GatewayEvent, responseSignerPublicKey ed25519.PublicKey, wantRequestID string) { + t.Helper() + + require.Equal(t, contractsgatewayv1.ServerTimeEventType, event.GetEventType()) + require.Equal(t, wantRequestID, event.GetEventId()) + require.Equal(t, wantRequestID, event.GetRequestId()) + require.NoError(t, contractsgatewayv1.VerifyPayloadHash(event.GetPayloadBytes(), event.GetPayloadHash())) + require.NoError(t, contractsgatewayv1.VerifyEventSignature(responseSignerPublicKey, event.GetSignature(), contractsgatewayv1.EventSigningFields{ + EventType: event.GetEventType(), + EventID: event.GetEventId(), + TimestampMS: event.GetTimestampMs(), + RequestID: event.GetRequestId(), + TraceID: event.GetTraceId(), + PayloadHash: event.GetPayloadHash(), + })) +} + +func assertNotificationPushEvent( + t *testing.T, + event *gatewayv1.GatewayEvent, + responseSignerPublicKey ed25519.PublicKey, + notificationType string, + notificationID string, + userID string, + index int, +) { + t.Helper() + + require.Equal(t, notificationType, event.GetEventType()) + require.Equal(t, notificationID+"/push:user:"+userID, event.GetEventId()) + require.Equal(t, pushRequestID(index), event.GetRequestId()) + require.Equal(t, pushTraceID(index), event.GetTraceId()) + require.NotEmpty(t, event.GetPayloadBytes()) + require.NoError(t, contractsgatewayv1.VerifyPayloadHash(event.GetPayloadBytes(), event.GetPayloadHash())) + require.NoError(t, contractsgatewayv1.VerifyEventSignature(responseSignerPublicKey, event.GetSignature(), contractsgatewayv1.EventSigningFields{ + EventType: event.GetEventType(), + EventID: event.GetEventId(), + TimestampMS: event.GetTimestampMs(), + RequestID: event.GetRequestId(), + TraceID: event.GetTraceId(), + PayloadHash: event.GetPayloadHash(), + })) +} + +func assertNoGatewayEvent(t *testing.T, stream grpc.ServerStreamingClient[gatewayv1.GatewayEvent], cancel context.CancelFunc) { + t.Helper() + + eventCh := make(chan *gatewayv1.GatewayEvent, 1) + errCh := make(chan error, 1) + go func() { + event, err := stream.Recv() + if err != nil { + errCh <- err + return + } + eventCh <- event + }() + + select { + case event := <-eventCh: + require.FailNowf(t, "unexpected gateway event delivered", "%+v", event) + case <-time.After(200 * time.Millisecond): + cancel() + case err := <-errCh: + require.FailNowf(t, "stream closed unexpectedly", "%v", err) + } +} + +func pushRequestID(index int) string { + return "notification-request-" + string(rune('a'+index)) +} + +func pushTraceID(index int) string { + return "notification-trace-" + string(rune('a'+index)) +} diff --git a/integration/notificationmail/notification_mail_test.go b/integration/notificationmail/notification_mail_test.go new file mode 100644 index 0000000..cfa007d --- /dev/null +++ b/integration/notificationmail/notification_mail_test.go @@ -0,0 +1,622 @@ +package notificationmail_test + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "path/filepath" + "runtime" + "testing" + "time" + + "galaxy/integration/internal/harness" + + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" +) + +const ( + notificationMailDeliveriesPath = "/api/v1/internal/deliveries" + notificationMailIntentsStream = "notification:intents" +) + +func TestNotificationMailPublishesEveryTemplateModeDeliveryToRealMailService(t *testing.T) { + h := newNotificationMailHarness(t) + + recipient := h.ensureUser(t, "pilot@example.com", "fr-FR") + + cases := []mailIntentCase{ + { + name: "geo review recommended admin", + notificationType: "geo.review_recommended", + producer: "geoprofile", + audienceKind: "admin_email", + recipientEmail: "geo-admin@example.com", + routeID: "email:email:geo-admin@example.com", + payload: map[string]any{ + "user_id": "user-geo", + "user_email": "traveler@example.com", + "observed_country": "DE", + "usual_connection_country": "PL", + "review_reason": "country_mismatch", + }, + }, + { + name: "game turn ready user", + notificationType: "game.turn.ready", + producer: "game_master", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "turn_number": 54, + }, + }, + { + name: "game finished user", + notificationType: "game.finished", + producer: "game_master", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "final_turn_number": 55, + }, + }, + { + name: "game generation failed admin", + notificationType: "game.generation_failed", + producer: "game_master", + audienceKind: "admin_email", + recipientEmail: "game-admin@example.com", + routeID: "email:email:game-admin@example.com", + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "failure_reason": "engine_timeout", + }, + }, + { + name: "lobby runtime paused admin", + notificationType: "lobby.runtime_paused_after_start", + producer: "game_lobby", + audienceKind: "admin_email", + recipientEmail: "lobby-ops@example.com", + routeID: "email:email:lobby-ops@example.com", + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + }, + }, + { + name: "lobby application submitted user", + notificationType: "lobby.application.submitted", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "applicant_user_id": "applicant-1", + "applicant_name": "Nova Pilot", + }, + }, + { + name: "lobby application submitted admin", + notificationType: "lobby.application.submitted", + producer: "game_lobby", + audienceKind: "admin_email", + recipientEmail: "lobby-admin@example.com", + routeID: "email:email:lobby-admin@example.com", + payload: map[string]any{ + "game_id": "game-456", + "game_name": "Public Stars", + "applicant_user_id": "applicant-2", + "applicant_name": "Public Pilot", + }, + }, + { + name: "lobby membership approved user", + notificationType: "lobby.membership.approved", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + }, + }, + { + name: "lobby membership rejected user", + notificationType: "lobby.membership.rejected", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + }, + }, + { + name: "lobby invite created user", + notificationType: "lobby.invite.created", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "inviter_user_id": "owner-1", + "inviter_name": "Owner Pilot", + }, + }, + { + name: "lobby invite redeemed user", + notificationType: "lobby.invite.redeemed", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "invitee_user_id": "invitee-1", + "invitee_name": "Nova Pilot", + }, + }, + { + name: "lobby invite expired user", + notificationType: "lobby.invite.expired", + producer: "game_lobby", + audienceKind: "user", + recipientEmail: recipient.Email, + payload: map[string]any{ + "game_id": "game-123", + "game_name": "Nebula Clash", + "invitee_user_id": "invitee-1", + "invitee_name": "Nova Pilot", + }, + }, + } + + for index, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + messageID := h.publishMailIntent(t, tc, recipient.UserID, index) + routeID := tc.routeID + if routeID == "" { + routeID = "email:user:" + recipient.UserID + } + + idempotencyKey := "notification:" + messageID + "/" + routeID + list := h.eventuallyListDeliveries(t, url.Values{ + "source": []string{"notification"}, + "status": []string{"sent"}, + "recipient": []string{tc.recipientEmail}, + "template_id": []string{tc.notificationType}, + "idempotency_key": []string{idempotencyKey}, + }) + require.Len(t, list.Items, 1) + require.Equal(t, "notification", list.Items[0].Source) + require.Equal(t, "sent", list.Items[0].Status) + require.Equal(t, "template", list.Items[0].PayloadMode) + require.Equal(t, tc.notificationType, list.Items[0].TemplateID) + require.Equal(t, "en", list.Items[0].Locale) + require.Equal(t, []string{tc.recipientEmail}, list.Items[0].To) + + detail := h.getDelivery(t, list.Items[0].DeliveryID) + require.Equal(t, "notification", detail.Source) + require.Equal(t, "template", detail.PayloadMode) + require.Equal(t, tc.notificationType, detail.TemplateID) + require.Equal(t, "en", detail.Locale) + require.False(t, detail.LocaleFallbackUsed) + require.Equal(t, idempotencyKey, detail.IdempotencyKey) + require.Equal(t, []string{tc.recipientEmail}, detail.To) + require.Empty(t, detail.Cc) + require.Empty(t, detail.Bcc) + require.Empty(t, detail.ReplyTo) + require.Empty(t, detail.Attachments) + assertTemplateVariables(t, tc.payload, detail.TemplateVariables) + }) + } +} + +type notificationMailHarness struct { + redis *redis.Client + + userServiceURL string + mailBaseURL string + + notificationProcess *harness.Process + mailProcess *harness.Process + userServiceProcess *harness.Process +} + +type mailIntentCase struct { + name string + notificationType string + producer string + audienceKind string + recipientEmail string + routeID string + payload map[string]any +} + +type ensureByEmailResponse struct { + Outcome string `json:"outcome"` + UserID string `json:"user_id"` + Email string +} + +type mailDeliveryListResponse struct { + Items []mailDeliverySummary `json:"items"` +} + +type mailDeliverySummary struct { + DeliveryID string `json:"delivery_id"` + Source string `json:"source"` + PayloadMode string `json:"payload_mode"` + TemplateID string `json:"template_id"` + Locale string `json:"locale"` + LocaleFallbackUsed bool `json:"locale_fallback_used"` + To []string `json:"to"` + Cc []string `json:"cc"` + Bcc []string `json:"bcc"` + ReplyTo []string `json:"reply_to"` + IdempotencyKey string `json:"idempotency_key"` + Status string `json:"status"` + AttemptCount int `json:"attempt_count"` + LastAttemptStatus string `json:"last_attempt_status,omitempty"` + ProviderSummary string `json:"provider_summary,omitempty"` + CreatedAtMS int64 `json:"created_at_ms"` + UpdatedAtMS int64 `json:"updated_at_ms"` + SentAtMS int64 `json:"sent_at_ms,omitempty"` +} + +type mailDeliveryDetailResponse struct { + DeliveryID string `json:"delivery_id"` + Source string `json:"source"` + PayloadMode string `json:"payload_mode"` + TemplateID string `json:"template_id"` + Locale string `json:"locale"` + LocaleFallbackUsed bool `json:"locale_fallback_used"` + To []string `json:"to"` + Cc []string `json:"cc"` + Bcc []string `json:"bcc"` + ReplyTo []string `json:"reply_to"` + Subject string `json:"subject,omitempty"` + TextBody string `json:"text_body,omitempty"` + HTMLBody string `json:"html_body,omitempty"` + Attachments []any `json:"attachments"` + IdempotencyKey string `json:"idempotency_key"` + Status string `json:"status"` + AttemptCount int `json:"attempt_count"` + LastAttemptStatus string `json:"last_attempt_status,omitempty"` + ProviderSummary string `json:"provider_summary,omitempty"` + TemplateVariables map[string]any `json:"template_variables,omitempty"` + CreatedAtMS int64 `json:"created_at_ms"` + UpdatedAtMS int64 `json:"updated_at_ms"` + SentAtMS int64 `json:"sent_at_ms,omitempty"` +} + +type httpResponse struct { + StatusCode int + Body string + Header http.Header +} + +func newNotificationMailHarness(t *testing.T) *notificationMailHarness { + t.Helper() + + redisRuntime := harness.StartRedisContainer(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisRuntime.Addr, + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, redisClient.Close()) + }) + + userServiceAddr := harness.FreeTCPAddress(t) + mailInternalAddr := harness.FreeTCPAddress(t) + notificationInternalAddr := harness.FreeTCPAddress(t) + + userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice") + mailBinary := harness.BuildBinary(t, "mail", "./mail/cmd/mail") + notificationBinary := harness.BuildBinary(t, "notification", "./notification/cmd/notification") + + userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, map[string]string{ + "USERSERVICE_LOG_LEVEL": "info", + "USERSERVICE_INTERNAL_HTTP_ADDR": userServiceAddr, + "USERSERVICE_REDIS_ADDR": redisRuntime.Addr, + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr) + + mailProcess := harness.StartProcess(t, "mail", mailBinary, map[string]string{ + "MAIL_LOG_LEVEL": "info", + "MAIL_INTERNAL_HTTP_ADDR": mailInternalAddr, + "MAIL_REDIS_ADDR": redisRuntime.Addr, + "MAIL_TEMPLATE_DIR": mailTemplateDir(t), + "MAIL_SMTP_MODE": "stub", + "MAIL_STREAM_BLOCK_TIMEOUT": "100ms", + "MAIL_OPERATOR_REQUEST_TIMEOUT": time.Second.String(), + "MAIL_SHUTDOWN_TIMEOUT": "2s", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForMailReady(t, mailProcess, "http://"+mailInternalAddr) + + notificationProcess := harness.StartProcess(t, "notification", notificationBinary, map[string]string{ + "NOTIFICATION_LOG_LEVEL": "info", + "NOTIFICATION_INTERNAL_HTTP_ADDR": notificationInternalAddr, + "NOTIFICATION_REDIS_ADDR": redisRuntime.Addr, + "NOTIFICATION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr, + "NOTIFICATION_USER_SERVICE_TIMEOUT": time.Second.String(), + "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MIN": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MAX": "100ms", + "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED": "geo-admin@example.com", + "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED": "game-admin@example.com", + "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START": "lobby-ops@example.com", + "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED": "lobby-admin@example.com", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + harness.WaitForHTTPStatus(t, notificationProcess, "http://"+notificationInternalAddr+"/readyz", http.StatusOK) + + return ¬ificationMailHarness{ + redis: redisClient, + userServiceURL: "http://" + userServiceAddr, + mailBaseURL: "http://" + mailInternalAddr, + notificationProcess: notificationProcess, + mailProcess: mailProcess, + userServiceProcess: userServiceProcess, + } +} + +func (h *notificationMailHarness) ensureUser(t *testing.T, email string, preferredLanguage string) ensureByEmailResponse { + t.Helper() + + response := postJSONValue(t, h.userServiceURL+"/api/v1/internal/users/ensure-by-email", map[string]any{ + "email": email, + "registration_context": map[string]string{ + "preferred_language": preferredLanguage, + "time_zone": "Europe/Kaliningrad", + }, + }) + + var body ensureByEmailResponse + requireJSONStatus(t, response, http.StatusOK, &body) + require.Equal(t, "created", body.Outcome) + require.NotEmpty(t, body.UserID) + body.Email = email + return body +} + +func (h *notificationMailHarness) publishMailIntent(t *testing.T, tc mailIntentCase, recipientUserID string, index int) string { + t.Helper() + + payload, err := json.Marshal(tc.payload) + require.NoError(t, err) + + values := map[string]any{ + "notification_type": tc.notificationType, + "producer": tc.producer, + "audience_kind": tc.audienceKind, + "idempotency_key": fmt.Sprintf("%s:mail:%02d", tc.notificationType, index), + "occurred_at_ms": "1775121700000", + "payload_json": string(payload), + } + if tc.audienceKind == "user" { + values["recipient_user_ids_json"] = `["` + recipientUserID + `"]` + } + + messageID, err := h.redis.XAdd(context.Background(), &redis.XAddArgs{ + Stream: notificationMailIntentsStream, + Values: values, + }).Result() + require.NoError(t, err) + + return messageID +} + +func (h *notificationMailHarness) eventuallyListDeliveries(t *testing.T, query url.Values) mailDeliveryListResponse { + t.Helper() + + var response mailDeliveryListResponse + require.Eventually(t, func() bool { + response = h.listDeliveries(t, query) + return len(response.Items) > 0 + }, 10*time.Second, 50*time.Millisecond) + + return response +} + +func (h *notificationMailHarness) listDeliveries(t *testing.T, query url.Values) mailDeliveryListResponse { + t.Helper() + + target := h.mailBaseURL + notificationMailDeliveriesPath + if encoded := query.Encode(); encoded != "" { + target += "?" + encoded + } + + request, err := http.NewRequest(http.MethodGet, target, nil) + require.NoError(t, err) + return doJSONRequest[mailDeliveryListResponse](t, request, http.StatusOK) +} + +func (h *notificationMailHarness) getDelivery(t *testing.T, deliveryID string) mailDeliveryDetailResponse { + t.Helper() + + request, err := http.NewRequest(http.MethodGet, h.mailBaseURL+notificationMailDeliveriesPath+"/"+url.PathEscape(deliveryID), nil) + require.NoError(t, err) + return doJSONRequest[mailDeliveryDetailResponse](t, request, http.StatusOK) +} + +func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-missing/exists", nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs()) +} + +func waitForMailReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+notificationMailDeliveriesPath, nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for mail readiness: timeout\n%s", process.Logs()) +} + +func doJSONRequest[T any](t *testing.T, request *http.Request, wantStatus int) T { + t.Helper() + + response := doRequest(t, request) + require.Equal(t, wantStatus, response.StatusCode, response.Body) + + var decoded T + require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), &decoded), response.Body) + return decoded +} + +func postJSONValue(t *testing.T, targetURL string, body any) httpResponse { + t.Helper() + + payload, err := json.Marshal(body) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload)) + require.NoError(t, err) + request.Header.Set("Content-Type", "application/json") + return doRequest(t, request) +} + +func requireJSONStatus(t *testing.T, response httpResponse, wantStatus int, target any) { + t.Helper() + + require.Equal(t, wantStatus, response.StatusCode, "response body: %s", response.Body) + require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), target)) +} + +func doRequest(t *testing.T, request *http.Request) httpResponse { + t.Helper() + + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + t.Cleanup(client.CloseIdleConnections) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + require.NoError(t, err) + + return httpResponse{ + StatusCode: response.StatusCode, + Body: string(payload), + Header: response.Header.Clone(), + } +} + +func decodeStrictJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func assertTemplateVariables(t *testing.T, want map[string]any, got map[string]any) { + t.Helper() + + require.NotEmpty(t, got) + for key, wantValue := range want { + gotValue, ok := got[key] + require.Truef(t, ok, "template variable %q is missing", key) + switch typedWant := wantValue.(type) { + case string: + require.Equal(t, typedWant, gotValue) + case int: + require.Equal(t, float64(typedWant), gotValue) + default: + require.Equal(t, typedWant, gotValue) + } + } +} + +func mailTemplateDir(t *testing.T) string { + t.Helper() + + return filepath.Join(repositoryRoot(t), "mail", "templates") +} + +func repositoryRoot(t *testing.T) string { + t.Helper() + + _, file, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("resolve repository root: runtime caller is unavailable") + } + + return filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..")) +} diff --git a/integration/notificationuser/notification_user_test.go b/integration/notificationuser/notification_user_test.go new file mode 100644 index 0000000..1a49a21 --- /dev/null +++ b/integration/notificationuser/notification_user_test.go @@ -0,0 +1,391 @@ +package notificationuser_test + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "io" + "net/http" + "testing" + "time" + + "galaxy/integration/internal/harness" + + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" +) + +const notificationUserIntentsStream = "notification:intents" + +func TestNotificationUserEnrichmentPersistsResolvedRecipient(t *testing.T) { + h := newNotificationUserHarness(t) + + recipient := h.ensureUser(t, "pilot@example.com", "fr-FR") + messageID := h.publishUserIntent(t, recipient.UserID, "game.turn.ready", "game_master", "enrichment-success", `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`) + + route := h.waitForRoute(t, messageID, "email:user:"+recipient.UserID) + require.Equal(t, messageID, route.NotificationID) + require.Equal(t, "email:user:"+recipient.UserID, route.RouteID) + require.Equal(t, "email", route.Channel) + require.Equal(t, "user:"+recipient.UserID, route.RecipientRef) + require.Equal(t, "pilot@example.com", route.ResolvedEmail) + require.Equal(t, "en", route.ResolvedLocale) + + offset := h.waitForStreamOffset(t) + require.Equal(t, messageID, offset.LastProcessedEntryID) +} + +func TestNotificationUserMissingRecipientIsMalformedAndAdvancesOffset(t *testing.T) { + h := newNotificationUserHarness(t) + + messageID := h.publishUserIntent(t, "user-missing", "game.turn.ready", "game_master", "missing-user", `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`) + + malformed := h.waitForMalformedIntent(t, messageID) + require.Equal(t, messageID, malformed.StreamEntryID) + require.Equal(t, "game.turn.ready", malformed.NotificationType) + require.Equal(t, "game_master", malformed.Producer) + require.Equal(t, "recipient_not_found", malformed.FailureCode) + + offset := h.waitForStreamOffset(t) + require.Equal(t, messageID, offset.LastProcessedEntryID) +} + +func TestNotificationUserTemporaryUnavailabilityDoesNotAdvanceOffset(t *testing.T) { + h := newNotificationUserHarness(t) + + recipient := h.ensureUser(t, "temporary@example.com", "en") + h.notificationProcess.AllowUnexpectedExit() + h.userServiceProcess.Stop(t) + + messageID := h.publishUserIntent(t, recipient.UserID, "game.turn.ready", "game_master", "temporary-user-service", `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`) + + require.Never(t, func() bool { + offset, ok := h.loadStreamOffset(t) + return ok && offset.LastProcessedEntryID == messageID + }, time.Second, 50*time.Millisecond) + + exists, err := h.redis.Exists(context.Background(), notificationMalformedIntentKey(messageID)).Result() + require.NoError(t, err) + require.Zero(t, exists) + + exists, err = h.redis.Exists(context.Background(), notificationRouteKey(messageID, "email:user:"+recipient.UserID)).Result() + require.NoError(t, err) + require.Zero(t, exists) +} + +type notificationUserHarness struct { + redis *redis.Client + + userServiceURL string + + notificationProcess *harness.Process + userServiceProcess *harness.Process +} + +type ensureByEmailResponse struct { + Outcome string `json:"outcome"` + UserID string `json:"user_id"` +} + +type notificationRouteRecord struct { + NotificationID string `json:"notification_id"` + RouteID string `json:"route_id"` + Channel string `json:"channel"` + RecipientRef string `json:"recipient_ref"` + Status string `json:"status"` + ResolvedEmail string `json:"resolved_email,omitempty"` + ResolvedLocale string `json:"resolved_locale,omitempty"` +} + +type malformedIntentRecord struct { + StreamEntryID string `json:"stream_entry_id"` + NotificationType string `json:"notification_type,omitempty"` + Producer string `json:"producer,omitempty"` + IdempotencyKey string `json:"idempotency_key,omitempty"` + FailureCode string `json:"failure_code"` + FailureMessage string `json:"failure_message"` + RawFields map[string]any `json:"raw_fields_json"` + RecordedAtMS int64 `json:"recorded_at_ms"` +} + +type streamOffsetRecord struct { + Stream string `json:"stream"` + LastProcessedEntryID string `json:"last_processed_entry_id"` + UpdatedAtMS int64 `json:"updated_at_ms"` +} + +type httpResponse struct { + StatusCode int + Body string + Header http.Header +} + +func newNotificationUserHarness(t *testing.T) *notificationUserHarness { + t.Helper() + + redisRuntime := harness.StartRedisContainer(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisRuntime.Addr, + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, redisClient.Close()) + }) + + userServiceAddr := harness.FreeTCPAddress(t) + notificationInternalAddr := harness.FreeTCPAddress(t) + + userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice") + notificationBinary := harness.BuildBinary(t, "notification", "./notification/cmd/notification") + + userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, map[string]string{ + "USERSERVICE_LOG_LEVEL": "info", + "USERSERVICE_INTERNAL_HTTP_ADDR": userServiceAddr, + "USERSERVICE_REDIS_ADDR": redisRuntime.Addr, + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr) + + notificationProcess := harness.StartProcess(t, "notification", notificationBinary, map[string]string{ + "NOTIFICATION_LOG_LEVEL": "info", + "NOTIFICATION_INTERNAL_HTTP_ADDR": notificationInternalAddr, + "NOTIFICATION_REDIS_ADDR": redisRuntime.Addr, + "NOTIFICATION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr, + "NOTIFICATION_USER_SERVICE_TIMEOUT": "250ms", + "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MIN": "100ms", + "NOTIFICATION_ROUTE_BACKOFF_MAX": "100ms", + "OTEL_TRACES_EXPORTER": "none", + "OTEL_METRICS_EXPORTER": "none", + }) + harness.WaitForHTTPStatus(t, notificationProcess, "http://"+notificationInternalAddr+"/readyz", http.StatusOK) + + return ¬ificationUserHarness{ + redis: redisClient, + userServiceURL: "http://" + userServiceAddr, + notificationProcess: notificationProcess, + userServiceProcess: userServiceProcess, + } +} + +func (h *notificationUserHarness) ensureUser(t *testing.T, email string, preferredLanguage string) ensureByEmailResponse { + t.Helper() + + response := postJSONValue(t, h.userServiceURL+"/api/v1/internal/users/ensure-by-email", map[string]any{ + "email": email, + "registration_context": map[string]string{ + "preferred_language": preferredLanguage, + "time_zone": "Europe/Kaliningrad", + }, + }) + + var body ensureByEmailResponse + requireJSONStatus(t, response, http.StatusOK, &body) + require.Equal(t, "created", body.Outcome) + require.NotEmpty(t, body.UserID) + return body +} + +func (h *notificationUserHarness) publishUserIntent(t *testing.T, recipientUserID string, notificationType string, producer string, idempotencyKey string, payloadJSON string) string { + t.Helper() + + messageID, err := h.redis.XAdd(context.Background(), &redis.XAddArgs{ + Stream: notificationUserIntentsStream, + Values: map[string]any{ + "notification_type": notificationType, + "producer": producer, + "audience_kind": "user", + "recipient_user_ids_json": `["` + recipientUserID + `"]`, + "idempotency_key": idempotencyKey, + "occurred_at_ms": "1775121700000", + "payload_json": payloadJSON, + }, + }).Result() + require.NoError(t, err) + + return messageID +} + +func (h *notificationUserHarness) waitForRoute(t *testing.T, notificationID string, routeID string) notificationRouteRecord { + t.Helper() + + key := notificationRouteKey(notificationID, routeID) + var route notificationRouteRecord + require.Eventually(t, func() bool { + payload, err := h.redis.Get(context.Background(), key).Bytes() + if err != nil { + return false + } + require.NoError(t, decodeJSONPayload(payload, &route)) + return true + }, 10*time.Second, 50*time.Millisecond) + + return route +} + +func (h *notificationUserHarness) waitForMalformedIntent(t *testing.T, streamEntryID string) malformedIntentRecord { + t.Helper() + + key := notificationMalformedIntentKey(streamEntryID) + var record malformedIntentRecord + require.Eventually(t, func() bool { + payload, err := h.redis.Get(context.Background(), key).Bytes() + if err != nil { + return false + } + require.NoError(t, decodeStrictJSONPayload(payload, &record)) + return true + }, 10*time.Second, 50*time.Millisecond) + + return record +} + +func (h *notificationUserHarness) waitForStreamOffset(t *testing.T) streamOffsetRecord { + t.Helper() + + var offset streamOffsetRecord + require.Eventually(t, func() bool { + var ok bool + offset, ok = h.loadStreamOffset(t) + return ok + }, 10*time.Second, 50*time.Millisecond) + + return offset +} + +func (h *notificationUserHarness) loadStreamOffset(t *testing.T) (streamOffsetRecord, bool) { + t.Helper() + + payload, err := h.redis.Get(context.Background(), notificationStreamOffsetKey()).Bytes() + if errors.Is(err, redis.Nil) { + return streamOffsetRecord{}, false + } + require.NoError(t, err) + + var offset streamOffsetRecord + require.NoError(t, decodeStrictJSONPayload(payload, &offset)) + return offset, true +} + +func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) { + t.Helper() + + client := &http.Client{Timeout: 250 * time.Millisecond} + t.Cleanup(client.CloseIdleConnections) + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + request, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-missing/exists", nil) + require.NoError(t, err) + + response, err := client.Do(request) + if err == nil { + _, _ = io.Copy(io.Discard, response.Body) + response.Body.Close() + if response.StatusCode == http.StatusOK { + return + } + } + + time.Sleep(25 * time.Millisecond) + } + + t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs()) +} + +func postJSONValue(t *testing.T, targetURL string, body any) httpResponse { + t.Helper() + + payload, err := json.Marshal(body) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload)) + require.NoError(t, err) + request.Header.Set("Content-Type", "application/json") + return doRequest(t, request) +} + +func requireJSONStatus(t *testing.T, response httpResponse, wantStatus int, target any) { + t.Helper() + + require.Equal(t, wantStatus, response.StatusCode, "response body: %s", response.Body) + require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), target)) +} + +func doRequest(t *testing.T, request *http.Request) httpResponse { + t.Helper() + + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + t.Cleanup(client.CloseIdleConnections) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + require.NoError(t, err) + + return httpResponse{ + StatusCode: response.StatusCode, + Body: string(payload), + Header: response.Header.Clone(), + } +} + +func decodeStrictJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func decodeJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func notificationRouteKey(notificationID string, routeID string) string { + return "notification:routes:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID) +} + +func notificationMalformedIntentKey(streamEntryID string) string { + return "notification:malformed_intents:" + encodeKeyComponent(streamEntryID) +} + +func notificationStreamOffsetKey() string { + return "notification:stream_offsets:" + encodeKeyComponent(notificationUserIntentsStream) +} + +func encodeKeyComponent(value string) string { + return base64.RawURLEncoding.EncodeToString([]byte(value)) +} diff --git a/mail/README.md b/mail/README.md index f163888..53afc9c 100644 --- a/mail/README.md +++ b/mail/README.md @@ -42,6 +42,8 @@ Cross-service routing rules: - `Notification Service -> Mail Service` is asynchronous `Redis Streams` - `Geo Profile Service` must route optional admin e-mail through `Notification Service`, not directly to `Mail Service` +- auth-code delivery remains a direct `Auth / Session Service -> Mail Service` + flow and does not pass through `Notification Service` ## Runtime Surface @@ -192,6 +194,7 @@ Stable envelope fields: - `source` - `payload_mode` - `idempotency_key` +- `requested_at_ms` - `request_id` - `trace_id` - `payload_json` @@ -200,6 +203,16 @@ Contract rules: - async `source` is fixed to `notification` - supported `payload_mode` values are `rendered` and `template` +- `Notification Service` uses only `payload_mode=template` for + notification-generated mail, even though the generic async contract keeps + both `rendered` and `template` +- notification-owned `template_id` values are identical to the + `notification_type` vocabulary, for example `game.turn.ready` and + `lobby.membership.approved` +- the real `Notification Service -> Mail Service` integration suite verifies + template-mode handoff for notification-owned mail +- `requested_at_ms` stores the publisher-side original request timestamp in + Unix milliseconds - `request_id` and `trace_id` are observability-only metadata and do not participate in idempotency fingerprinting - malformed commands are metered, logged, and recorded as dedicated @@ -338,6 +351,13 @@ Required auth fallback files: - `auth.login_code/en/subject.tmpl` - `auth.login_code/en/text.tmpl` +Notification-owned English template directories are frozen by +[`../notification/README.md`](../notification/README.md) and the service-local +[`Notification Service` docs](../notification/docs/README.md). +`auth.login_code` remains the required auth template family for the direct +`Auth / Session Service -> Mail Service` flow and is not part of the +notification-owned template set. + Rendering rules: - the process loads the full catalog at startup diff --git a/mail/api/delivery-commands-asyncapi.yaml b/mail/api/delivery-commands-asyncapi.yaml index 64764ed..dc42b83 100644 --- a/mail/api/delivery-commands-asyncapi.yaml +++ b/mail/api/delivery-commands-asyncapi.yaml @@ -56,7 +56,7 @@ components: payload_mode: template idempotency_key: notification:mail-124 requested_at_ms: "1775121700001" - payload_json: '{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn_ready","locale":"fr-FR","variables":{"turn_number":54},"attachments":[]}' + payload_json: '{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"fr-FR","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}' schemas: RenderedDeliveryCommandEnvelope: type: object diff --git a/mail/docs/examples.md b/mail/docs/examples.md index 1cd2ee5..d9a489c 100644 --- a/mail/docs/examples.md +++ b/mail/docs/examples.md @@ -91,7 +91,7 @@ redis-cli XADD mail:delivery_commands '*' \ idempotency_key notification:mail-124 \ request_id req-124 \ trace_id trace-124 \ - payload_json '{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn_ready","locale":"fr-FR","variables":{"turn_number":54},"attachments":[]}' + payload_json '{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"fr-FR","variables":{"turn_number":54},"attachments":[]}' ``` ## Operator API Examples diff --git a/mail/internal/adapters/templates/catalog_test.go b/mail/internal/adapters/templates/catalog_test.go index 686f39b..5c5b712 100644 --- a/mail/internal/adapters/templates/catalog_test.go +++ b/mail/internal/adapters/templates/catalog_test.go @@ -17,9 +17,9 @@ func TestNewCatalogBuildsImmutableRegistry(t *testing.T) { rootDir := t.TempDir() writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "subject.tmpl"), "Your login code") writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "text.tmpl"), "Code: {{.code}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "fr-fr", "subject.tmpl"), "Tour {{.turn_number}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "fr-fr", "text.tmpl"), "Bonjour {{with .player}}{{.name}}{{end}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "fr-fr", "html.tmpl"), "

{{.player.name}}

") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "fr-fr", "subject.tmpl"), "Tour {{.turn_number}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "fr-fr", "text.tmpl"), "Bonjour {{with .player}}{{.name}}{{end}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "fr-fr", "html.tmpl"), "

{{.player.name}}

") catalog, err := NewCatalog(rootDir) require.NoError(t, err) @@ -27,7 +27,7 @@ func TestNewCatalogBuildsImmutableRegistry(t *testing.T) { locale, err := common.ParseLocale("fr-FR") require.NoError(t, err) - resolved, err := catalog.Lookup(common.TemplateID("game.turn_ready"), locale) + resolved, err := catalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.NoError(t, err) require.False(t, resolved.LocaleFallbackUsed()) require.Equal(t, common.Locale("fr-FR"), resolved.ResolvedLocale()) @@ -66,15 +66,15 @@ func TestCatalogLookupFallsBackToEnglish(t *testing.T) { rootDir := t.TempDir() writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "subject.tmpl"), "Your login code") writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "text.tmpl"), "Code: {{.code}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "text.tmpl"), "Hello {{.player.name}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "text.tmpl"), "Hello {{.player.name}}") catalog, err := NewCatalog(rootDir) require.NoError(t, err) locale, err := common.ParseLocale("fr-FR") require.NoError(t, err) - resolved, err := catalog.Lookup(common.TemplateID("game.turn_ready"), locale) + resolved, err := catalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.NoError(t, err) require.True(t, resolved.LocaleFallbackUsed()) require.Equal(t, common.Locale("en"), resolved.ResolvedLocale()) @@ -86,15 +86,15 @@ func TestCatalogLookupRejectsMissingEnglishFallback(t *testing.T) { rootDir := t.TempDir() writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "subject.tmpl"), "Your login code") writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "text.tmpl"), "Code: {{.code}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "fr-FR", "subject.tmpl"), "Tour {{.turn_number}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "fr-FR", "text.tmpl"), "Bonjour {{.player.name}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "fr-FR", "subject.tmpl"), "Tour {{.turn_number}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "fr-FR", "text.tmpl"), "Bonjour {{.player.name}}") catalog, err := NewCatalog(rootDir) require.NoError(t, err) locale, err := common.ParseLocale("de-DE") require.NoError(t, err) - _, err = catalog.Lookup(common.TemplateID("game.turn_ready"), locale) + _, err = catalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.Error(t, err) require.True(t, errors.Is(err, ErrFallbackMissing)) } @@ -111,7 +111,7 @@ func TestCatalogLookupRejectsUnknownTemplateFamily(t *testing.T) { locale, err := common.ParseLocale("en") require.NoError(t, err) - _, err = catalog.Lookup(common.TemplateID("game.turn_ready"), locale) + _, err = catalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.Error(t, err) require.True(t, errors.Is(err, ErrTemplateNotFound)) } @@ -143,8 +143,8 @@ func TestCatalogVersionIsDeterministic(t *testing.T) { rootDir := t.TempDir() writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "subject.tmpl"), "Your login code") writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "text.tmpl"), "Code: {{.code}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "text.tmpl"), "Hello {{.player.name}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "text.tmpl"), "Hello {{.player.name}}") firstCatalog, err := NewCatalog(rootDir) require.NoError(t, err) @@ -153,9 +153,9 @@ func TestCatalogVersionIsDeterministic(t *testing.T) { locale, err := common.ParseLocale("en") require.NoError(t, err) - firstResolved, err := firstCatalog.Lookup(common.TemplateID("game.turn_ready"), locale) + firstResolved, err := firstCatalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.NoError(t, err) - secondResolved, err := secondCatalog.Lookup(common.TemplateID("game.turn_ready"), locale) + secondResolved, err := secondCatalog.Lookup(common.TemplateID("game.turn.ready"), locale) require.NoError(t, err) require.Equal(t, firstResolved.Template().Version, secondResolved.Template().Version) @@ -173,8 +173,8 @@ func TestNewCatalogRejectsMissingRequiredStartupTemplate(t *testing.T) { t.Parallel() rootDir := t.TempDir() - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "text.tmpl"), "Hello {{.player.name}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "subject.tmpl"), "Turn {{.turn_number}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "text.tmpl"), "Hello {{.player.name}}") _, err := NewCatalog(rootDir) require.Error(t, err) @@ -187,8 +187,8 @@ func TestNewCatalogRejectsBrokenTemplateParse(t *testing.T) { rootDir := t.TempDir() writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "subject.tmpl"), "Your login code") writeTemplateFile(t, rootDir, filepath.Join("auth.login_code", "en", "text.tmpl"), "Code: {{.code}}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "subject.tmpl"), "{{if .turn_number}") - writeTemplateFile(t, rootDir, filepath.Join("game.turn_ready", "en", "text.tmpl"), "Hello {{.player.name}}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "subject.tmpl"), "{{if .turn_number}") + writeTemplateFile(t, rootDir, filepath.Join("game.turn.ready", "en", "text.tmpl"), "Hello {{.player.name}}") _, err := NewCatalog(rootDir) require.Error(t, err) diff --git a/mail/internal/adapters/templates/checked_in_assets_test.go b/mail/internal/adapters/templates/checked_in_assets_test.go new file mode 100644 index 0000000..fbe4070 --- /dev/null +++ b/mail/internal/adapters/templates/checked_in_assets_test.go @@ -0,0 +1,58 @@ +package templates + +import ( + "path/filepath" + "runtime" + "testing" + + "galaxy/mail/internal/domain/common" + + "github.com/stretchr/testify/require" +) + +var expectedNotificationTemplateIDs = []common.TemplateID{ + "geo.review_recommended", + "game.turn.ready", + "game.finished", + "game.generation_failed", + "lobby.runtime_paused_after_start", + "lobby.application.submitted", + "lobby.membership.approved", + "lobby.membership.rejected", + "lobby.invite.created", + "lobby.invite.redeemed", + "lobby.invite.expired", +} + +func TestCheckedInTemplateCatalogIncludesNotificationEnglishAssets(t *testing.T) { + t.Parallel() + + catalog, err := NewCatalog(checkedInTemplateRoot(t)) + require.NoError(t, err) + + locale, err := common.ParseLocale("en") + require.NoError(t, err) + + authTemplate, err := catalog.Lookup(common.TemplateID("auth.login_code"), locale) + require.NoError(t, err) + require.Equal(t, common.Locale("en"), authTemplate.ResolvedLocale()) + require.False(t, authTemplate.LocaleFallbackUsed()) + + for _, templateID := range expectedNotificationTemplateIDs { + resolved, err := catalog.Lookup(templateID, locale) + require.NoErrorf(t, err, "lookup checked-in template %s", templateID) + require.Equalf(t, common.Locale("en"), resolved.ResolvedLocale(), "template %s must resolve to en", templateID) + require.Falsef(t, resolved.LocaleFallbackUsed(), "template %s must not use fallback for en", templateID) + } +} + +func checkedInTemplateRoot(t *testing.T) string { + t.Helper() + + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + require.FailNow(t, "runtime.Caller failed") + } + + return filepath.Clean(filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "templates")) +} diff --git a/mail/internal/api/streamcommand/contract_test.go b/mail/internal/api/streamcommand/contract_test.go index 83bfccf..1c09efe 100644 --- a/mail/internal/api/streamcommand/contract_test.go +++ b/mail/internal/api/streamcommand/contract_test.go @@ -53,7 +53,7 @@ func TestDecodeCommandSuccessTemplate(t *testing.T) { command, err := DecodeCommand(validTemplateFields(t)) require.NoError(t, err) - require.Equal(t, common.TemplateID("game.turn_ready"), command.TemplateID) + require.Equal(t, common.TemplateID("game.turn.ready"), command.TemplateID) require.Equal(t, common.Locale("fr-FR"), command.Locale) require.Equal(t, map[string]any{ "turn_number": float64(54), @@ -171,7 +171,7 @@ func TestDecodeCommandRejectsInvalidPayload(t *testing.T) { "subject": "Turn ready", "text_body": "Turn 54 is ready.", "attachments": []map[string]any{}, - "template_id": "game.turn_ready", + "template_id": "game.turn.ready", }) return fields }(t), @@ -212,7 +212,7 @@ func TestDecodeCommandRejectsInvalidPayload(t *testing.T) { "cc": []string{}, "bcc": []string{}, "reply_to": []string{}, - "template_id": "game.turn_ready", + "template_id": "game.turn.ready", "locale": "english", "variables": map[string]any{}, "attachments": []map[string]any{}, @@ -230,7 +230,7 @@ func TestDecodeCommandRejectsInvalidPayload(t *testing.T) { "cc": []string{}, "bcc": []string{}, "reply_to": []string{}, - "template_id": "game.turn_ready", + "template_id": "game.turn.ready", "locale": "fr-FR", "variables": []string{"not", "object"}, "attachments": []map[string]any{}, @@ -428,7 +428,7 @@ func validTemplatePayloadJSON(t *testing.T) string { "cc": []string{}, "bcc": []string{}, "reply_to": []string{}, - "template_id": "game.turn_ready", + "template_id": "game.turn.ready", "locale": "fr-FR", "variables": map[string]any{ "turn_number": 54, diff --git a/mail/internal/app/runtime_test.go b/mail/internal/app/runtime_test.go index 07333f7..66bd60a 100644 --- a/mail/internal/app/runtime_test.go +++ b/mail/internal/app/runtime_test.go @@ -104,9 +104,9 @@ func TestNewRuntimeRejectsBrokenTemplateCatalog(t *testing.T) { require.NoError(t, os.MkdirAll(filepath.Join(rootDir, "auth.login_code", "en"), 0o755)) require.NoError(t, os.WriteFile(filepath.Join(rootDir, "auth.login_code", "en", "subject.tmpl"), []byte("Your login code"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(rootDir, "auth.login_code", "en", "text.tmpl"), []byte("Code: {{.code}}"), 0o644)) - require.NoError(t, os.MkdirAll(filepath.Join(rootDir, "game.turn_ready", "en"), 0o755)) - require.NoError(t, os.WriteFile(filepath.Join(rootDir, "game.turn_ready", "en", "subject.tmpl"), []byte("{{if .turn_number}"), 0o644)) - require.NoError(t, os.WriteFile(filepath.Join(rootDir, "game.turn_ready", "en", "text.tmpl"), []byte("Turn ready"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(rootDir, "game.turn.ready", "en"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(rootDir, "game.turn.ready", "en", "subject.tmpl"), []byte("{{if .turn_number}"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(rootDir, "game.turn.ready", "en", "text.tmpl"), []byte("Turn ready"), 0o644)) cfg := config.DefaultConfig() cfg.Redis.Addr = redisServer.Addr() diff --git a/mail/internal/domain/delivery/model_test.go b/mail/internal/domain/delivery/model_test.go index 685f369..0e99a8c 100644 --- a/mail/internal/domain/delivery/model_test.go +++ b/mail/internal/domain/delivery/model_test.go @@ -264,7 +264,7 @@ func validTemplateQueuedDelivery(t *testing.T) Delivery { DeliveryID: common.DeliveryID("delivery-124"), Source: SourceNotification, PayloadMode: PayloadModeTemplate, - TemplateID: common.TemplateID("game.turn_ready"), + TemplateID: common.TemplateID("game.turn.ready"), Envelope: validEnvelope(), Locale: locale, TemplateVariables: map[string]any{ diff --git a/mail/internal/service/acceptgenericdelivery/service_test.go b/mail/internal/service/acceptgenericdelivery/service_test.go index df66a67..44e716d 100644 --- a/mail/internal/service/acceptgenericdelivery/service_test.go +++ b/mail/internal/service/acceptgenericdelivery/service_test.go @@ -62,7 +62,7 @@ func TestServiceExecuteAcceptsTemplateDelivery(t *testing.T) { require.Equal(t, Result{Outcome: OutcomeAccepted}, result) require.Len(t, store.createInputs, 1) require.Nil(t, store.createInputs[0].DeliveryPayload) - require.Equal(t, common.TemplateID("game.turn_ready"), store.createInputs[0].Delivery.TemplateID) + require.Equal(t, common.TemplateID("game.turn.ready"), store.createInputs[0].Delivery.TemplateID) require.Equal(t, map[string]any{ "turn_number": float64(54), "player": map[string]any{ @@ -201,7 +201,7 @@ func TestServiceExecuteLogsAcceptedDeliveryAndCreatesSpan(t *testing.T) { require.NoError(t, err) require.Contains(t, loggerBuffer.String(), "\"delivery_id\":\"mail-124\"") require.Contains(t, loggerBuffer.String(), "\"source\":\"notification\"") - require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn_ready\"") + require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn.ready\"") require.Contains(t, loggerBuffer.String(), "\"trace_id\":\"trace-123\"") require.Contains(t, loggerBuffer.String(), "\"otel_trace_id\":") require.True(t, hasSpanNamed(recorder.Ended(), "mail.accept_generic_delivery")) @@ -295,7 +295,7 @@ func validTemplateCommand(t *testing.T) streamcommand.Command { "payload_mode": "template", "idempotency_key": "notification:mail-124", "requested_at_ms": "1775121700001", - "payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn_ready","locale":"fr-FR","variables":{"turn_number":54,"player":{"name":"Pilot"}},"attachments":[]}`, + "payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"fr-FR","variables":{"turn_number":54,"player":{"name":"Pilot"}},"attachments":[]}`, }) require.NoError(t, err) diff --git a/mail/internal/service/executeattempt/service_test.go b/mail/internal/service/executeattempt/service_test.go index b16e9c7..f8b8572 100644 --- a/mail/internal/service/executeattempt/service_test.go +++ b/mail/internal/service/executeattempt/service_test.go @@ -308,7 +308,7 @@ func TestServiceExecuteRecordsMetricsAndLogsProviderResult(t *testing.T) { require.Equal(t, []string{"smtp:accepted"}, telemetry.providerDurations) require.Contains(t, loggerBuffer.String(), "\"delivery_id\":\"delivery-template-sending\"") require.Contains(t, loggerBuffer.String(), "\"source\":\"notification\"") - require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn_ready\"") + require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn.ready\"") require.Contains(t, loggerBuffer.String(), "\"attempt_no\":1") require.Contains(t, loggerBuffer.String(), "\"otel_trace_id\":") require.True(t, hasExecuteSpanNamed(recorder.Ended(), "mail.provider_send")) @@ -431,7 +431,7 @@ func queuedTemplateWorkItem(t *testing.T) WorkItem { DeliveryID: common.DeliveryID("delivery-template"), Source: deliverydomain.SourceNotification, PayloadMode: deliverydomain.PayloadModeTemplate, - TemplateID: common.TemplateID("game.turn_ready"), + TemplateID: common.TemplateID("game.turn.ready"), Envelope: deliverydomain.Envelope{ To: []common.Email{common.Email("pilot@example.com")}, }, @@ -512,7 +512,7 @@ func sendingTemplateWorkItem(t *testing.T, attemptNo int) WorkItem { DeliveryID: common.DeliveryID("delivery-template-sending"), Source: deliverydomain.SourceNotification, PayloadMode: deliverydomain.PayloadModeTemplate, - TemplateID: common.TemplateID("game.turn_ready"), + TemplateID: common.TemplateID("game.turn.ready"), Envelope: deliverydomain.Envelope{ To: []common.Email{common.Email("pilot@example.com")}, }, diff --git a/mail/internal/service/renderdelivery/service_test.go b/mail/internal/service/renderdelivery/service_test.go index 730bec2..bb2f569 100644 --- a/mail/internal/service/renderdelivery/service_test.go +++ b/mail/internal/service/renderdelivery/service_test.go @@ -26,9 +26,9 @@ func TestServiceExecuteRendersExactLocale(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "fr-fr", "subject.tmpl"): "Tour {{.turn_number}}", - filepath.Join("game.turn_ready", "fr-fr", "text.tmpl"): "Bonjour {{with .player}}{{.name}}{{end}}", - filepath.Join("game.turn_ready", "fr-fr", "html.tmpl"): "

{{.player.name}}

", + filepath.Join("game.turn.ready", "fr-fr", "subject.tmpl"): "Tour {{.turn_number}}", + filepath.Join("game.turn.ready", "fr-fr", "text.tmpl"): "Bonjour {{with .player}}{{.name}}{{end}}", + filepath.Join("game.turn.ready", "fr-fr", "html.tmpl"): "

{{.player.name}}

", }) store := &stubStore{} @@ -61,8 +61,8 @@ func TestServiceExecuteFallsBackToEnglish(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", - filepath.Join("game.turn_ready", "en", "text.tmpl"): "Hello {{.player.name}}", + filepath.Join("game.turn.ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", + filepath.Join("game.turn.ready", "en", "text.tmpl"): "Hello {{.player.name}}", }) store := &stubStore{} @@ -86,8 +86,8 @@ func TestServiceExecuteRecordsLocaleFallbackAndLogsFields(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", - filepath.Join("game.turn_ready", "en", "text.tmpl"): "Hello {{.player.name}}", + filepath.Join("game.turn.ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", + filepath.Join("game.turn.ready", "en", "text.tmpl"): "Hello {{.player.name}}", }) telemetry := &stubTelemetry{} @@ -107,10 +107,10 @@ func TestServiceExecuteRecordsLocaleFallbackAndLogsFields(t *testing.T) { _, err := service.Execute(context.Background(), validInput(t, "fr-FR")) require.NoError(t, err) require.Equal(t, []string{"notification:rendered"}, telemetry.statuses) - require.Equal(t, []string{"game.turn_ready:fr-FR:en"}, telemetry.fallbacks) + require.Equal(t, []string{"game.turn.ready:fr-FR:en"}, telemetry.fallbacks) require.Contains(t, loggerBuffer.String(), "\"delivery_id\":\"delivery-123\"") require.Contains(t, loggerBuffer.String(), "\"source\":\"notification\"") - require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn_ready\"") + require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn.ready\"") require.Contains(t, loggerBuffer.String(), "\"attempt_no\":1") require.Contains(t, loggerBuffer.String(), "\"otel_trace_id\":") require.True(t, hasRenderSpanNamed(recorder.Ended(), "mail.render_delivery")) @@ -122,8 +122,8 @@ func TestServiceExecuteFailsOnMissingRequiredVariable(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", - filepath.Join("game.turn_ready", "en", "text.tmpl"): "Hello {{.player.name}}", + filepath.Join("game.turn.ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", + filepath.Join("game.turn.ready", "en", "text.tmpl"): "Hello {{.player.name}}", }) store := &stubStore{} @@ -153,8 +153,8 @@ func TestServiceExecuteFailsOnTemplateExecutionError(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "en", "subject.tmpl"): "{{call .callable}}", - filepath.Join("game.turn_ready", "en", "text.tmpl"): "Hello {{.player.name}}", + filepath.Join("game.turn.ready", "en", "subject.tmpl"): "{{call .callable}}", + filepath.Join("game.turn.ready", "en", "text.tmpl"): "Hello {{.player.name}}", }) store := &stubStore{} @@ -231,8 +231,8 @@ func TestServiceExecuteReturnsServiceUnavailableOnStoreFailure(t *testing.T) { catalog := newTestCatalog(t, map[string]string{ filepath.Join("auth.login_code", "en", "subject.tmpl"): "Your login code", filepath.Join("auth.login_code", "en", "text.tmpl"): "Code: {{.code}}", - filepath.Join("game.turn_ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", - filepath.Join("game.turn_ready", "en", "text.tmpl"): "Hello {{.player.name}}", + filepath.Join("game.turn.ready", "en", "subject.tmpl"): "Turn {{.turn_number}}", + filepath.Join("game.turn.ready", "en", "text.tmpl"): "Hello {{.player.name}}", }) service := newTestService(t, Config{ @@ -346,7 +346,7 @@ func validInput(t *testing.T, localeValue string) Input { DeliveryID: common.DeliveryID("delivery-123"), Source: deliverydomain.SourceNotification, PayloadMode: deliverydomain.PayloadModeTemplate, - TemplateID: common.TemplateID("game.turn_ready"), + TemplateID: common.TemplateID("game.turn.ready"), Envelope: deliverydomain.Envelope{ To: []common.Email{common.Email("pilot@example.com")}, }, diff --git a/mail/internal/service/resenddelivery/service_test.go b/mail/internal/service/resenddelivery/service_test.go index 61adf58..3ea122f 100644 --- a/mail/internal/service/resenddelivery/service_test.go +++ b/mail/internal/service/resenddelivery/service_test.go @@ -121,7 +121,7 @@ func TestServiceExecuteLogsCloneCreationAndCreatesSpan(t *testing.T) { require.Equal(t, []string{"operator_resend:queued"}, telemetry.statuses) require.Contains(t, loggerBuffer.String(), "\"delivery_id\":\"clone-456\"") require.Contains(t, loggerBuffer.String(), "\"source\":\"operator_resend\"") - require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn_ready\"") + require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn.ready\"") require.Contains(t, loggerBuffer.String(), "\"otel_trace_id\":") require.True(t, hasResendSpanNamed(recorder.Ended(), "mail.resend_delivery")) } @@ -205,7 +205,7 @@ func validOriginalDelivery() deliverydomain.Delivery { DeliveryID: common.DeliveryID("delivery-original"), Source: deliverydomain.SourceNotification, PayloadMode: deliverydomain.PayloadModeTemplate, - TemplateID: common.TemplateID("game.turn_ready"), + TemplateID: common.TemplateID("game.turn.ready"), Envelope: deliverydomain.Envelope{ To: []common.Email{common.Email("pilot@example.com")}, Cc: []common.Email{common.Email("copilot@example.com")}, diff --git a/mail/internal/worker/command_consumer_test.go b/mail/internal/worker/command_consumer_test.go index d807447..5d8692b 100644 --- a/mail/internal/worker/command_consumer_test.go +++ b/mail/internal/worker/command_consumer_test.go @@ -59,7 +59,7 @@ func TestCommandConsumerAcceptsTemplateCommand(t *testing.T) { return false } entryID, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream) - return err == nil && found && entryID == messageID && delivery.TemplateID == "game.turn_ready" + return err == nil && found && entryID == messageID && delivery.TemplateID == "game.turn.ready" }, 5*time.Second, 20*time.Millisecond) cancel() @@ -324,7 +324,7 @@ func addTemplateCommand(t *testing.T, client *redis.Client, deliveryID string, i "payload_mode": "template", "idempotency_key": idempotencyKey, "requested_at_ms": "1775121700001", - "payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn_ready","locale":"fr-FR","variables":{"turn_number":54},"attachments":[]}`, + "payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"fr-FR","variables":{"turn_number":54},"attachments":[]}`, }, }).Result() require.NoError(t, err) diff --git a/mail/templates/game.finished/en/subject.tmpl b/mail/templates/game.finished/en/subject.tmpl new file mode 100644 index 0000000..157844a --- /dev/null +++ b/mail/templates/game.finished/en/subject.tmpl @@ -0,0 +1 @@ +Game finished: {{.game_name}} diff --git a/mail/templates/game.finished/en/text.tmpl b/mail/templates/game.finished/en/text.tmpl new file mode 100644 index 0000000..9db3a6b --- /dev/null +++ b/mail/templates/game.finished/en/text.tmpl @@ -0,0 +1,4 @@ +{{.game_name}} has finished. + +Game ID: {{.game_id}} +Final turn: {{.final_turn_number}} diff --git a/mail/templates/game.generation_failed/en/subject.tmpl b/mail/templates/game.generation_failed/en/subject.tmpl new file mode 100644 index 0000000..0d5f6e6 --- /dev/null +++ b/mail/templates/game.generation_failed/en/subject.tmpl @@ -0,0 +1 @@ +Turn generation failed in {{.game_name}} diff --git a/mail/templates/game.generation_failed/en/text.tmpl b/mail/templates/game.generation_failed/en/text.tmpl new file mode 100644 index 0000000..0865b94 --- /dev/null +++ b/mail/templates/game.generation_failed/en/text.tmpl @@ -0,0 +1,4 @@ +Turn generation failed for {{.game_name}}. + +Game ID: {{.game_id}} +Failure reason: {{.failure_reason}} diff --git a/mail/templates/game.turn.ready/en/subject.tmpl b/mail/templates/game.turn.ready/en/subject.tmpl new file mode 100644 index 0000000..3eac296 --- /dev/null +++ b/mail/templates/game.turn.ready/en/subject.tmpl @@ -0,0 +1 @@ +Turn {{.turn_number}} is ready in {{.game_name}} diff --git a/mail/templates/game.turn.ready/en/text.tmpl b/mail/templates/game.turn.ready/en/text.tmpl new file mode 100644 index 0000000..ff4afd1 --- /dev/null +++ b/mail/templates/game.turn.ready/en/text.tmpl @@ -0,0 +1,4 @@ +A new turn is ready in {{.game_name}}. + +Game ID: {{.game_id}} +Turn: {{.turn_number}} diff --git a/mail/templates/geo.review_recommended/en/subject.tmpl b/mail/templates/geo.review_recommended/en/subject.tmpl new file mode 100644 index 0000000..8d30eed --- /dev/null +++ b/mail/templates/geo.review_recommended/en/subject.tmpl @@ -0,0 +1 @@ +Geo review recommended for {{.user_email}} diff --git a/mail/templates/geo.review_recommended/en/text.tmpl b/mail/templates/geo.review_recommended/en/text.tmpl new file mode 100644 index 0000000..8888659 --- /dev/null +++ b/mail/templates/geo.review_recommended/en/text.tmpl @@ -0,0 +1,5 @@ +User {{.user_email}} ({{.user_id}}) entered the geo review queue. + +Observed country: {{.observed_country}} +Usual connection country: {{.usual_connection_country}} +Reason: {{.review_reason}} diff --git a/mail/templates/lobby.application.submitted/en/subject.tmpl b/mail/templates/lobby.application.submitted/en/subject.tmpl new file mode 100644 index 0000000..5a417c8 --- /dev/null +++ b/mail/templates/lobby.application.submitted/en/subject.tmpl @@ -0,0 +1 @@ +New application for {{.game_name}} diff --git a/mail/templates/lobby.application.submitted/en/text.tmpl b/mail/templates/lobby.application.submitted/en/text.tmpl new file mode 100644 index 0000000..a1ea863 --- /dev/null +++ b/mail/templates/lobby.application.submitted/en/text.tmpl @@ -0,0 +1,4 @@ +{{.applicant_name}} submitted an application for {{.game_name}}. + +Game ID: {{.game_id}} +Applicant user ID: {{.applicant_user_id}} diff --git a/mail/templates/lobby.invite.created/en/subject.tmpl b/mail/templates/lobby.invite.created/en/subject.tmpl new file mode 100644 index 0000000..3833c7a --- /dev/null +++ b/mail/templates/lobby.invite.created/en/subject.tmpl @@ -0,0 +1 @@ +You were invited to {{.game_name}} diff --git a/mail/templates/lobby.invite.created/en/text.tmpl b/mail/templates/lobby.invite.created/en/text.tmpl new file mode 100644 index 0000000..5270d3b --- /dev/null +++ b/mail/templates/lobby.invite.created/en/text.tmpl @@ -0,0 +1,4 @@ +{{.inviter_name}} invited you to join {{.game_name}}. + +Game ID: {{.game_id}} +Inviter user ID: {{.inviter_user_id}} diff --git a/mail/templates/lobby.invite.expired/en/subject.tmpl b/mail/templates/lobby.invite.expired/en/subject.tmpl new file mode 100644 index 0000000..7531105 --- /dev/null +++ b/mail/templates/lobby.invite.expired/en/subject.tmpl @@ -0,0 +1 @@ +Invite expired for {{.game_name}} diff --git a/mail/templates/lobby.invite.expired/en/text.tmpl b/mail/templates/lobby.invite.expired/en/text.tmpl new file mode 100644 index 0000000..0f9394b --- /dev/null +++ b/mail/templates/lobby.invite.expired/en/text.tmpl @@ -0,0 +1,4 @@ +An invite for {{.game_name}} expired before redemption. + +Game ID: {{.game_id}} +Invitee user ID: {{.invitee_user_id}} diff --git a/mail/templates/lobby.invite.redeemed/en/subject.tmpl b/mail/templates/lobby.invite.redeemed/en/subject.tmpl new file mode 100644 index 0000000..472db5c --- /dev/null +++ b/mail/templates/lobby.invite.redeemed/en/subject.tmpl @@ -0,0 +1 @@ +Invite redeemed for {{.game_name}} diff --git a/mail/templates/lobby.invite.redeemed/en/text.tmpl b/mail/templates/lobby.invite.redeemed/en/text.tmpl new file mode 100644 index 0000000..8cb8d10 --- /dev/null +++ b/mail/templates/lobby.invite.redeemed/en/text.tmpl @@ -0,0 +1,4 @@ +{{.invitee_name}} redeemed an invite for {{.game_name}}. + +Game ID: {{.game_id}} +Invitee user ID: {{.invitee_user_id}} diff --git a/mail/templates/lobby.membership.approved/en/subject.tmpl b/mail/templates/lobby.membership.approved/en/subject.tmpl new file mode 100644 index 0000000..9c265ab --- /dev/null +++ b/mail/templates/lobby.membership.approved/en/subject.tmpl @@ -0,0 +1 @@ +Application approved for {{.game_name}} diff --git a/mail/templates/lobby.membership.approved/en/text.tmpl b/mail/templates/lobby.membership.approved/en/text.tmpl new file mode 100644 index 0000000..219e961 --- /dev/null +++ b/mail/templates/lobby.membership.approved/en/text.tmpl @@ -0,0 +1,3 @@ +Your application for {{.game_name}} was approved. + +Game ID: {{.game_id}} diff --git a/mail/templates/lobby.membership.rejected/en/subject.tmpl b/mail/templates/lobby.membership.rejected/en/subject.tmpl new file mode 100644 index 0000000..13475bf --- /dev/null +++ b/mail/templates/lobby.membership.rejected/en/subject.tmpl @@ -0,0 +1 @@ +Application rejected for {{.game_name}} diff --git a/mail/templates/lobby.membership.rejected/en/text.tmpl b/mail/templates/lobby.membership.rejected/en/text.tmpl new file mode 100644 index 0000000..162c29a --- /dev/null +++ b/mail/templates/lobby.membership.rejected/en/text.tmpl @@ -0,0 +1,3 @@ +Your application for {{.game_name}} was rejected. + +Game ID: {{.game_id}} diff --git a/mail/templates/lobby.runtime_paused_after_start/en/subject.tmpl b/mail/templates/lobby.runtime_paused_after_start/en/subject.tmpl new file mode 100644 index 0000000..b6f7b04 --- /dev/null +++ b/mail/templates/lobby.runtime_paused_after_start/en/subject.tmpl @@ -0,0 +1 @@ +Game paused after start: {{.game_name}} diff --git a/mail/templates/lobby.runtime_paused_after_start/en/text.tmpl b/mail/templates/lobby.runtime_paused_after_start/en/text.tmpl new file mode 100644 index 0000000..ef27e7b --- /dev/null +++ b/mail/templates/lobby.runtime_paused_after_start/en/text.tmpl @@ -0,0 +1,3 @@ +{{.game_name}} entered paused state after runtime startup. + +Game ID: {{.game_id}} diff --git a/notification/PLAN.md b/notification/PLAN.md new file mode 100644 index 0000000..09ad071 --- /dev/null +++ b/notification/PLAN.md @@ -0,0 +1,365 @@ +# Notification Service Implementation Plan + +This plan has been already implemented and stays here for historical reasons. + +It should NOT be threated as source of truth for service functionality. + +## Summary + +This plan builds `Notification Service` as a durable asynchronous orchestration +service between domain producers, `Gateway`, `Mail Service`, and `User Service`. +The implementation must keep business-audience resolution in the producer, +contact enrichment in `Notification Service`, client push delivery in +`Gateway`, and email execution in `Mail Service`. + +## Global Rules + +- Keep `Notification Service` orchestration-only. +- Preserve direct auth-code email flow from `Auth / Session Service` to `Mail Service`. +- Use one dedicated Redis Stream for normalized notification intents. +- Keep route retries independent per channel. +- Do not make notification delivery a correctness dependency for gameplay or + geo review state. +- Keep user-facing push payloads lightweight. + +## ~~Stage 01.~~ Freeze Vocabulary And Cross-Service Ownership + +Status: implemented. + +Note: + +- Later-stage artifacts may already exist in the repository as draft or + pre-staged documentation. +- Their presence does not mark the corresponding later stages as implemented. + +Goal: + +- remove ambiguity before runtime work starts + +Tasks: + +- Freeze `notification:intents` as the dedicated ingress stream. +- Freeze that producers publish concrete `recipient_user_id` values for + user-targeted intents. +- Freeze that `Notification Service` resolves user email and locale from + `User Service`. +- Freeze that admin-only notifications use type-specific configured email + lists. +- Freeze that `template_id == notification_type`. +- Freeze that private-game invites in v1 are user-bound by internal `user_id`. + +Exit criteria: + +- `ARCHITECTURE.md`, `TESTING.md`, and service READMEs no longer contradict the + agreed notification model + +## ~~Stage 02.~~ Define The Intent Contract + +Status: implemented. + +Goal: + +- publish one stable producer-to-notification contract + +Tasks: + +- Add `notification/api/intents-asyncapi.yaml`. +- Freeze envelope fields: + - `notification_type` + - `producer` + - `audience_kind` + - `recipient_user_ids_json` + - `idempotency_key` + - `occurred_at_ms` + - `request_id` + - `trace_id` + - `payload_json` +- Freeze duplicate and conflict rules on `(producer, idempotency_key)`. +- Freeze `audience_kind=user|admin_email`. + +Exit criteria: + +- every producer can publish normalized intents without service-specific side + agreements + +## ~~Stage 03.~~ Freeze The Notification Catalog + +Status: implemented. + +Goal: + +- turn product decisions into one exact type catalog + +Tasks: + +- Freeze v1 types and channel matrix. +- Freeze which types are user-targeted versus admin-only. +- Freeze that `lobby.application.submitted` is user-targeted for private games + and admin-email-only for public games. +- Freeze that `lobby.invite.revoked` produces no notification. +- Freeze payload requirements per type. + +Exit criteria: + +- no notification type remains partially specified + +## ~~Stage 04.~~ Define Push Payload Schemas + +Status: implemented. + +Goal: + +- freeze lightweight client-facing payloads + +Tasks: + +- Add `pkg/schema/fbs/notification.fbs`. +- Define one table per user-facing push type. +- Generate Go bindings under `pkg/schema/fbs/notification`. +- Document the mapping from `notification_type` to FlatBuffers table. + +Exit criteria: + +- `Gateway` and future client code have one stable schema file for + user-facing notification payloads + +## ~~Stage 05.~~ Freeze Mail Template Contracts + +Status: implemented. + +Goal: + +- make email handoff deterministic + +Tasks: + +- Freeze `payload_mode=template` for notification-generated email. +- Add initial `en` templates for all supported email types in + `mail/templates//en`. +- Update `mail` documentation so notification template IDs align with + `notification_type`. +- Keep `Auth / Session Service` auth-code mail unchanged. + +Exit criteria: + +- every supported email notification type has a documented template directory + +## ~~Stage 06.~~ Define Redis State And Retry Model + +Status: implemented. + +Goal: + +- freeze durable service-local storage before runtime code + +Tasks: + +- Define `notification_record`, `notification_route`, + `notification_idempotency_record`, `notification_dead_letter_entry`, and + malformed-intent storage. +- Freeze Redis keys and schedule structures. +- Freeze route status vocabulary: + - `pending` + - `published` + - `failed` + - `dead_letter` + - `skipped` +- Freeze retry budgets: + - `push=3` + - `email=7` + +Exit criteria: + +- the runtime can restart without losing accepted-or-retryable work + +## ~~Stage 07.~~ Build The Runnable Service Skeleton + +Status: implemented. + +Goal: + +- create the initial process shape + +Tasks: + +- Add `cmd/notification`. +- Add `internal/app`, `internal/config`, `internal/api`, `internal/service`, + and `internal/adapters` packages. +- Wire Redis startup checks, graceful shutdown, logger setup, and telemetry. +- Do not add an operator REST API in v1. + +Exit criteria: + +- the process boots with Redis and configuration validation only + +## ~~Stage 08.~~ Implement Intent Acceptance And Idempotency + +Status: implemented. + +Goal: + +- durably accept valid intents and reject invalid or conflicting duplicates + +Tasks: + +- Consume `notification:intents`. +- Validate the envelope and normalized payload. +- Persist idempotency records and accepted notification records. +- Record malformed intents separately. +- Materialize channel routes according to the type catalog and `audience_kind`. + +Exit criteria: + +- valid intents are durable and replay-safe before downstream publication begins + +## ~~Stage 09.~~ Implement User Enrichment And Locale Resolution + +Status: implemented. + +Goal: + +- make user-targeted routes self-sufficient for later publication + +Tasks: + +- Read users by `user_id` from `User Service`. +- Extract `email` and `preferred_language`. +- Apply `en` fallback when locale is missing or unsupported. +- Keep admin-email routes independent from `User Service`. + +Exit criteria: + +- every user-targeted route can be published without additional producer input + +## ~~Stage 10.~~ Implement Push Publication + +Status: implemented. + +Goal: + +- hand off user-facing notification events to `Gateway` + +Tasks: + +- Encode the correct FlatBuffers table per `notification_type`. +- Publish client events into the configured `Gateway` stream with `user_id` + targeting only. +- Apply independent `push` retry policy and route-level dead-letter handling. + +Exit criteria: + +- user-targeted push notifications survive temporary `Gateway` stream failures + +## ~~Stage 11.~~ Implement Mail Publication + +Status: implemented. + +Goal: + +- hand off non-auth email notifications to `Mail Service` + +Tasks: + +- Build template-mode generic mail commands. +- Set `template_id == notification_type`. +- Pass through normalized template variables from `payload_json`. +- Apply independent `email` retry policy and route-level dead-letter handling. + +Exit criteria: + +- user and admin email notifications are durably handed off to `Mail Service` + +## ~~Stage 12.~~ Integrate Producers + +Status: implemented. + +Note: + +- Implemented as the shared Go producer contract module + `galaxy/notificationintent` because `Game Lobby` and `Geo Profile Service` + code modules are not present in this repository yet. + +Goal: + +- move upstream services onto the new notification contract + +Tasks: + +- `Game Master` publishes: + - `game.turn.ready` + - `game.finished` + - `game.generation_failed` +- `Game Lobby` publishes: + - `lobby.runtime_paused_after_start` + - `lobby.application.submitted` + - `lobby.membership.approved` + - `lobby.membership.rejected` + - `lobby.invite.created` + - `lobby.invite.redeemed` + - `lobby.invite.expired` +- `Geo Profile Service` publishes: + - `geo.review_recommended` +- Update `Game Lobby` architecture and later implementation plan to use + user-bound private invites by `user_id`. + +Exit criteria: + +- producers no longer rely on ad hoc notification-side audience inference + +## ~~Stage 13.~~ Add Observability And Recovery Coverage + +Status: implemented. + +Goal: + +- make the async runtime supportable in operations + +Tasks: + +- Add metrics for intake, duplicates, enrichment, publish attempts, retries, + dead letters, and lag. +- Add structured logging fields shared across intake and route publishers. +- Document manual recovery steps for dead-letter inspection and replay. + +Exit criteria: + +- the runtime exposes enough signals to detect stuck, noisy, or broken delivery + +## ~~Stage 14.~~ Complete Test Coverage And Documentation Alignment + +Status: implemented. + +Goal: + +- close the loop across service tests, boundary tests, and docs + +Tasks: + +- Add service tests for malformed intents, duplicates, locale fallback, retry + budgets, and route isolation. +- Add inter-service tests with `Gateway`, `Mail Service`, `Game Master`, + `Game Lobby`, and `Geo Profile Service`. +- Update `TESTING.md`. +- Update `ARCHITECTURE.md`, `mail/README.md`, `geoprofile/README.md`, and + gateway examples. +- Verify docs still state that auth-code mail bypasses `Notification Service`. + +Exit criteria: + +- the implementation and the cross-service documentation describe the same + contracts + +## Final Acceptance Criteria + +The implementation is complete only when all of the following hold: + +- valid intents are consumed from `notification:intents` +- duplicates are idempotent and conflicting duplicates are rejected +- user enrichment resolves email and locale from `User Service` +- `push` and `email` routes are persisted and retried independently +- route dead letters are isolated per channel and per recipient +- `Gateway` fan-out remains user-wide, not session-specific +- `Mail Service` receives template-mode commands whose template IDs match + notification types +- admin notifications remain `email`-only +- auth-code email still bypasses `Notification Service` diff --git a/notification/README.md b/notification/README.md new file mode 100644 index 0000000..37959e7 --- /dev/null +++ b/notification/README.md @@ -0,0 +1,665 @@ +# Notification Service + +Canonical references: + +- [Service-local docs](docs/README.md) +- [Intent AsyncAPI contract](api/intents-asyncapi.yaml) +- [Probe OpenAPI contract](openapi.yaml) +- [Gateway push model](../gateway/README.md) +- [Mail async command contract](../mail/api/delivery-commands-asyncapi.yaml) +- [Notification FlatBuffers payloads](../pkg/schema/fbs/notification.fbs) +- [System architecture](../ARCHITECTURE.md) + +## Purpose + +`Notification Service` is the internal asynchronous orchestration layer for +platform notifications. + +It accepts normalized notification intents from upstream services, materializes +per-recipient routes, enriches user-targeted routes through `User Service`, +publishes client-facing push events toward `Gateway`, publishes non-auth email +commands toward `Mail Service`, and isolates transient downstream failures with +independent retry budgets per channel. + +The service is intentionally not a source of truth for: + +- game state +- lobby membership +- invite ownership +- review flags +- notification preferences +- email delivery attempts + +## Responsibility Boundaries + +`Notification Service` is responsible for: + +- consuming normalized notification intents from a dedicated Redis Stream +- validating intent envelopes and rejecting malformed or conflicting duplicates +- persisting durable notification and route state +- resolving user contact data from `User Service` by `user_id` +- selecting locale from `User Service.preferred_language` with `en` fallback +- shaping lightweight push payloads for user-facing events +- publishing template-mode email commands to `Mail Service` +- retrying route publication independently for `push` and `email` +- persisting dead-letter entries for exhausted routes + +`Notification Service` is not responsible for: + +- computing business audiences from `game_id` or other domain identifiers +- owning administrator identity or administrator user records +- sending auth-code email +- storing per-user notification preferences in v1 +- exposing an operator REST API in v1 + +The key design rule is that upstream producers must publish the concrete +`recipient_user_id` values for user-targeted notification intents. For +administrator-only notification types, recipient email addresses are resolved +from `Notification Service` configuration by `notification_type`. Private-game +invite notifications in v1 remain user-bound by internal `user_id` values and +must not target recipients by raw email address. + +## Runtime Surface + +The implemented process contains: + +- one private internal HTTP probe listener +- process-wide structured logging +- process-wide OpenTelemetry runtime +- one shared `galaxy/notificationintent` producer contract module +- one shared Redis client with startup connectivity check +- one trusted `User Service` HTTP enrichment client +- one plain-`XREAD` notification-intent consumer +- one long-lived `push` route publisher +- one long-lived `email` route publisher +- durable accepted-intent, route, idempotency, malformed-intent, and + stream-offset storage in Redis +- user-targeted route enrichment during intent acceptance before durable write +- client-facing `push` publication toward `Gateway` +- template-mode `email` publication toward `Mail Service` +- durable `push` and `email` retry, dead-letter, and temporary lease + coordination in Redis +- OpenTelemetry counters and observable gauges for intent intake, user + enrichment, route publication, route schedule depth, and intent stream lag +- graceful shutdown on process cancellation + +Probe contract: + +- `GET /healthz` returns `{"status":"ok"}` +- `GET /readyz` returns `{"status":"ready"}` +- `readyz` is process-local after successful startup and does not perform a + live Redis ping per request +- there is no `/metrics` route + +Runtime behavior: + +- the intent consumer reads `notification:intents` with plain `XREAD` +- when no stored stream offset exists, the consumer starts from `0-0` +- the persisted offset advances only after durable acceptance or durable + malformed-intent recording +- user-targeted routes are enriched through `GET /api/v1/internal/users/{user_id}` + before durable route write +- `404 subject_not_found` from `User Service` is recorded under + malformed-intent storage with `failure_code=recipient_not_found` +- temporary `User Service` lookup failures stop the consumer before + stream-offset advance +- due `push` routes are published toward `Gateway` from the shared + `notification:route_schedule` +- due `email` routes are published toward `Mail Service` from the shared + `notification:route_schedule` +- the `push` publisher claims only routes whose `route_id` starts with `push:` +- the `email` publisher claims only routes whose `route_id` starts with `email:` +- replicas coordinate through temporary Redis lease + `notification:route_leases::` +- `Gateway` publication uses `XADD MAXLEN ~` with + `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN` +- `event_id` equals `/` +- `Mail Service` publication uses plain `XADD` with no stream trimming +- `delivery_id` equals `/` +- `idempotency_key` equals `notification:/` +- `requested_at_ms` equals `accepted_at_ms` +- `request_id` and `trace_id` are forwarded when present +- `device_session_id` is intentionally omitted so `Gateway` fans the event out + to every active stream of that user +- Go producers use `galaxy/notificationintent` to construct and publish + compatible intents into `notification:intents` +- producer publication uses plain `XADD` without stream trimming or hidden + helper retries +- a producer-side notification publication failure is notification degradation + and must not roll back the source business state +- metric export uses the configured OpenTelemetry exporters only +- there is still no `/metrics` route +- `notification.route_schedule.depth` and + `notification.route_schedule.oldest_age_ms` are derived from + `notification:route_schedule` +- `notification.intent_stream.oldest_unprocessed_age_ms` is derived from the + persisted intent stream offset and the configured ingress stream +- manual dead-letter replay is performed by publishing a new compatible intent + with a new `idempotency_key`; existing dead-letter records remain audit + history until TTL expiry + +The target process shape is one internal-only process with: + +- one notification-intent consumer +- one `push` route publisher for `Gateway` +- one `email` route publisher for `Mail Service` + +Intentional runtime omissions in v1: + +- no public ingress +- no dedicated operator REST API +- no direct client delivery +- no direct SMTP integration + +## Configuration + +Required: + +- `NOTIFICATION_REDIS_ADDR` +- `NOTIFICATION_USER_SERVICE_BASE_URL` + +Primary configuration groups: + +- process and logging: + - `NOTIFICATION_SHUTDOWN_TIMEOUT` + - `NOTIFICATION_LOG_LEVEL` +- internal probe HTTP: + - `NOTIFICATION_INTERNAL_HTTP_ADDR` with default `:8092` + - `NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT` with default `2s` + - `NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT` with default `10s` + - `NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT` with default `1m` +- Redis connectivity: + - `NOTIFICATION_REDIS_USERNAME` + - `NOTIFICATION_REDIS_PASSWORD` + - `NOTIFICATION_REDIS_DB` + - `NOTIFICATION_REDIS_TLS_ENABLED` + - `NOTIFICATION_REDIS_OPERATION_TIMEOUT` +- stream names: + - `NOTIFICATION_INTENTS_STREAM` with default `notification:intents` + - `NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT` with default `2s` + - `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM` with default `gateway:client-events` + - `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN` with default `1024` + - `NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM` with default `mail:delivery_commands` +- retry and dead-letter: + - `NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS` with default `3` + - `NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS` with default `7` + - `NOTIFICATION_ROUTE_BACKOFF_MIN` with default `1s` + - `NOTIFICATION_ROUTE_BACKOFF_MAX` with default `5m` + - `NOTIFICATION_ROUTE_LEASE_TTL` with default `5s` + - `NOTIFICATION_DEAD_LETTER_TTL` with default `720h` + - `NOTIFICATION_RECORD_TTL` with default `720h` + - `NOTIFICATION_IDEMPOTENCY_TTL` with default `168h` +- `User Service` enrichment: + - `NOTIFICATION_USER_SERVICE_TIMEOUT` with default `1s` +- administrator routing: + - `NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED` + - `NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED` + - `NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START` + - `NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED` +- OpenTelemetry: + - standard `OTEL_*` variables + - `NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED` + - `NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED` + +Each administrator configuration variable stores a comma-separated list of +email addresses for exactly one `notification_type`. v1 does not use one global +admin-recipient list shared across all administrative events. + +## Stable Input Contract + +The service accepts intents from one dedicated Redis Stream: + +- `notification:intents` + +The canonical envelope is defined in +[api/intents-asyncapi.yaml](api/intents-asyncapi.yaml). +Go producers should use the shared `galaxy/notificationintent` module to build +and append compatible stream entries instead of duplicating field names, +payload structs, or validation rules locally. + +Required envelope fields: + +- `notification_type` +- `producer` +- `audience_kind` +- `idempotency_key` +- `occurred_at_ms` +- `payload_json` + +Optional envelope fields: + +- `recipient_user_ids_json` +- `request_id` +- `trace_id` + +Rules: + +- `audience_kind=user` requires `recipient_user_ids_json` with one or more + unique stable `user_id` values +- `audience_kind=admin_email` forbids `recipient_user_ids_json` +- `recipient_user_ids_json` is normalized as an unordered recipient set, so + duplicate `user_id` values are invalid and element order does not affect + idempotency +- `request_id` and `trace_id` are observability-only metadata and do not + participate in the idempotency fingerprint +- `payload_json` is type-specific, must remain backward-compatible for each + `notification_type`, and is normalized structurally for duplicate detection: + insignificant whitespace and object key order are ignored while array order + remains significant +- a replay with the same `(producer, idempotency_key)` and the same normalized + payload is treated as a successful duplicate +- a replay with the same `(producer, idempotency_key)` but different normalized + content is recorded as a conflicting duplicate under malformed-intent storage + with `failure_code=idempotency_conflict` and must not create new routes +- during user enrichment, a missing `user_id` in `User Service` is recorded + under malformed-intent storage with `failure_code=recipient_not_found` + +Malformed stream entries do not create durable notification records. They are +logged, metered, and recorded separately for operator inspection. +Accepted intents use the original Redis Stream `stream_entry_id` as +`notification_id`. + +## Notification Catalog + +`payload_json` fields are normalized by the producer before publication. + +| `notification_type` | Producer | Audience | Channels | Required `payload_json` fields | +| --- | --- | --- | --- | --- | +| `geo.review_recommended` | `Geo Profile Service` (`geoprofile`) | configured admin email list (`audience_kind=admin_email`) | `email` | `user_id`, `user_email`, `observed_country`, `usual_connection_country`, `review_reason` | +| `game.turn.ready` | `Game Master` (`game_master`) | active accepted participants (`audience_kind=user`) | `push+email` | `game_id`, `game_name`, `turn_number` | +| `game.finished` | `Game Master` (`game_master`) | active accepted participants (`audience_kind=user`) | `push+email` | `game_id`, `game_name`, `final_turn_number` | +| `game.generation_failed` | `Game Master` (`game_master`) | configured admin email list (`audience_kind=admin_email`) | `email` | `game_id`, `game_name`, `failure_reason` | +| `lobby.runtime_paused_after_start` | `Game Lobby` (`game_lobby`) | configured admin email list (`audience_kind=admin_email`) | `email` | `game_id`, `game_name` | +| `lobby.application.submitted` | `Game Lobby` (`game_lobby`) | private owner (`audience_kind=user`) or public admins (`audience_kind=admin_email`) | private: `push+email`, public: `email` | `game_id`, `game_name`, `applicant_user_id`, `applicant_name` | +| `lobby.membership.approved` | `Game Lobby` (`game_lobby`) | applicant user (`audience_kind=user`) | `push+email` | `game_id`, `game_name` | +| `lobby.membership.rejected` | `Game Lobby` (`game_lobby`) | applicant user (`audience_kind=user`) | `push+email` | `game_id`, `game_name` | +| `lobby.invite.created` | `Game Lobby` (`game_lobby`) | invited user (`audience_kind=user`) | `push+email` | `game_id`, `game_name`, `inviter_user_id`, `inviter_name` | +| `lobby.invite.redeemed` | `Game Lobby` (`game_lobby`) | private-game owner (`audience_kind=user`) | `push+email` | `game_id`, `game_name`, `invitee_user_id`, `invitee_name` | +| `lobby.invite.expired` | `Game Lobby` (`game_lobby`) | private-game owner (`audience_kind=user`) | `email` | `game_id`, `game_name`, `invitee_user_id`, `invitee_name` | + +Rules: + +- v1 supports exactly the eleven `notification_type` values listed above +- `lobby.application.submitted` keeps one stable `notification_type` and one + stable `payload_json` shape; private games publish `audience_kind=user` + while public games publish `audience_kind=admin_email` +- `lobby.invite.revoked` deliberately produces no notification in v1 and + remains outside the supported catalog +- private-game invite notifications remain user-bound by internal `user_id` + +## Recipient Enrichment And Locale Policy + +For `audience_kind=user`, `Notification Service` resolves user records through +the trusted `User Service` lookup endpoint: + +- `GET /api/v1/internal/users/{user_id}` + +The response supplies: + +- `email` +- `preferred_language` + +Locale rules: + +- current implemented support is exactly one locale: `en` +- exact `preferred_language` is used when supported by `Mail Service` +- unsupported, empty, or invalid language values fall back to `en` +- no intermediate locale reduction is used in v1 +- the same resolved locale drives both `push` payload localization decisions + and `Mail Service` template selection +- enrichment runs during intent acceptance before durable route write +- `404 subject_not_found` from `User Service` is treated as permanent producer + input error and becomes malformed-intent `recipient_not_found` +- temporary `User Service` failures stop the consumer before stream-offset + advance so the same stream entry is retried after restart + +For `audience_kind=admin_email`, `Notification Service` does not consult +`User Service` and instead resolves recipients from type-specific config. + +## Push Contract Toward Gateway + +Push events are published into the existing `Gateway` client-events stream. + +Stable routing rules: + +- `event_type` equals `notification_type` +- `event_id` equals `/` +- `user_id` is derived from `recipient_ref=user:` for user-targeted + routes +- `request_id` and `trace_id` are forwarded when present +- `device_session_id` is intentionally omitted so `Gateway` fans the event out + to every active stream of that user + +`Notification Service` appends `Gateway` events with `XADD MAXLEN ~` using +`NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN`. + +User-facing push payloads use +[pkg/schema/fbs/notification.fbs](../pkg/schema/fbs/notification.fbs). + +| `notification_type` | FlatBuffers table | Payload fields | +| --- | --- | --- | +| `game.turn.ready` | `notification.GameTurnReadyEvent` | `game_id`, `turn_number` | +| `game.finished` | `notification.GameFinishedEvent` | `game_id`, `final_turn_number` | +| `lobby.application.submitted` | `notification.LobbyApplicationSubmittedEvent` | `game_id`, `applicant_user_id` | +| `lobby.membership.approved` | `notification.LobbyMembershipApprovedEvent` | `game_id` | +| `lobby.membership.rejected` | `notification.LobbyMembershipRejectedEvent` | `game_id` | +| `lobby.invite.created` | `notification.LobbyInviteCreatedEvent` | `game_id`, `inviter_user_id` | +| `lobby.invite.redeemed` | `notification.LobbyInviteRedeemedEvent` | `game_id`, `invitee_user_id` | + +Only the seven user-facing push notification types above are represented in +`notification.fbs`. +`geo.review_recommended`, `game.generation_failed`, +`lobby.runtime_paused_after_start`, and `lobby.invite.expired` remain outside +this schema because they are email-only in v1. + +Checked-in generated Go bindings for this schema live under +[`../pkg/schema/fbs/notification`](../pkg/schema/fbs/notification). + +`notification_type` alone determines the concrete FlatBuffers table. +No extra envelope or FlatBuffers `union` is added in v1. + +The push payload must stay lightweight and must not attempt to mirror full game, +lobby, or profile state. +`game_name`, human-readable user names, and other full business-state fields +stay out of the push schema. +Clients react to the notification and then fetch fresh business state through +normal service APIs. + +## Email Contract Toward Mail Service + +Email routes are published to `Mail Service` through +`mail:delivery_commands` using the existing generic async command contract. + +Rules: + +- `delivery_id` equals `/` +- `source` is always `notification` +- `payload_mode` is always `template` +- `idempotency_key` equals `notification:/` +- `requested_at_ms` equals `accepted_at_ms` +- `request_id` and `trace_id` are forwarded when present +- `payload_json.to` contains exactly one resolved recipient email +- `payload_json.cc`, `payload_json.bcc`, `payload_json.reply_to`, and + `payload_json.attachments` are empty arrays in v1 +- `template_id` equals `notification_type` +- `locale` is the resolved language from the enrichment step or `en` +- template variables are passed through from normalized `payload_json` + +`Notification Service` appends `Mail Service` commands with plain `XADD` and +does not manage retention or trimming of `mail:delivery_commands`. + +Auth-code email remains a direct `Auth / Session Service -> Mail Service` flow +and does not pass through `Notification Service`. + +Initial notification-owned template assets: + +| `notification_type` | `template_id` | Required assets | +| --- | --- | --- | +| `geo.review_recommended` | `geo.review_recommended` | `en/subject.tmpl`, `en/text.tmpl` | +| `game.turn.ready` | `game.turn.ready` | `en/subject.tmpl`, `en/text.tmpl` | +| `game.finished` | `game.finished` | `en/subject.tmpl`, `en/text.tmpl` | +| `game.generation_failed` | `game.generation_failed` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.runtime_paused_after_start` | `lobby.runtime_paused_after_start` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.application.submitted` | `lobby.application.submitted` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.membership.approved` | `lobby.membership.approved` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.membership.rejected` | `lobby.membership.rejected` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.invite.created` | `lobby.invite.created` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.invite.redeemed` | `lobby.invite.redeemed` | `en/subject.tmpl`, `en/text.tmpl` | +| `lobby.invite.expired` | `lobby.invite.expired` | `en/subject.tmpl`, `en/text.tmpl` | + +`auth.login_code` does not belong to the notification-owned template set. + +## Route Model + +One accepted intent materializes: + +- one `notification_record` +- zero or more `notification_route` entries + +Each route represents exactly one `(channel, recipient_ref)` pair. + +Stable route statuses: + +- `pending` +- `published` +- `failed` +- `dead_letter` +- `skipped` + +Rules: + +- `pending` means the route is ready for first publish or retry +- `published` means the route was durably handed off to its downstream channel +- `failed` means the last publish attempt failed and a later retry is scheduled +- `dead_letter` means the route exhausted its retry budget +- `skipped` means the route slot was durably materialized but intentionally not + emitted + +Materialization rules: + +- every derived `recipient_ref` receives one `push` route slot and one `email` + route slot, except that an empty administrator email list materializes one + synthetic `config:` recipient slot with only a skipped + `email` route +- a route slot whose channel is outside the notification type channel matrix is + materialized as `skipped` +- `recipient_ref` is `user:` for user-targeted routes +- `recipient_ref` is `email:` for configured administrator + email routes +- when an administrator email list is empty, the service materializes one + synthetic recipient slot `config:` with one skipped + `email` route so the configuration gap remains durable and operator-visible +- `route_id` is mandatory and equals `:` + +The service-local aggregate notification status is derived from routes and is +not a separate durable source of truth. + +## Redis Logical Model + +Storage rules: + +- durable records are stored as strict JSON blobs +- timestamps are stored in Unix milliseconds +- dynamic Redis key segments are base64url-encoded +- `notification:route_schedule` is one shared sorted set for both `push` and + `email` + +| Logical artifact | Redis key | +| --- | --- | +| `notification_record` | `notification:records:` | +| `notification_route` | `notification:routes::` | +| temporary route lease | `notification:route_leases::` | +| `notification_idempotency_record` | `notification:idempotency::` | +| `notification_dead_letter_entry` | `notification:dead_letters::` | +| malformed intent record | `notification:malformed_intents:` | +| stream offset record | `notification:stream_offsets:` | +| ingress stream | `notification:intents` | +| route schedule sorted set | `notification:route_schedule` | + +| Record | Frozen fields | +| --- | --- | +| `notification_record` | `notification_id`, `notification_type`, `producer`, `audience_kind`, normalized `recipient_user_ids`, normalized `payload_json`, `idempotency_key`, `request_fingerprint`, optional `request_id`, optional `trace_id`, `occurred_at_ms`, `accepted_at_ms`, `updated_at_ms` | +| `notification_route` | `notification_id`, `route_id`, `channel`, `recipient_ref`, `status`, `attempt_count`, `max_attempts`, `next_attempt_at_ms`, optional `resolved_email`, optional `resolved_locale`, optional `last_error_classification`, optional `last_error_message`, optional `last_error_at_ms`, `created_at_ms`, `updated_at_ms`, optional `published_at_ms`, optional `dead_lettered_at_ms`, optional `skipped_at_ms` | +| `notification_idempotency_record` | `producer`, `idempotency_key`, `notification_id`, `request_fingerprint`, `created_at_ms`, `expires_at_ms` | +| `notification_dead_letter_entry` | `notification_id`, `route_id`, `channel`, `recipient_ref`, `final_attempt_count`, `max_attempts`, `failure_classification`, `failure_message`, `created_at_ms`, optional `recovery_hint` | +| malformed intent record | `stream_entry_id`, optional `notification_type`, optional `producer`, optional `idempotency_key`, `failure_code`, `failure_message`, `raw_fields_json`, `recorded_at_ms` | +| stream offset record | `stream`, `last_processed_entry_id`, `updated_at_ms` | + +`notification_record.recipient_user_ids` stores a normalized array of unique +`user_id` values and is omitted for `audience_kind=admin_email`. +`notification_record.payload_json` stores the canonical normalized JSON string +used for idempotency fingerprinting. +Temporary route lease keys store one opaque worker token and use +`NOTIFICATION_ROUTE_LEASE_TTL`; they are service-local coordination state +rather than durable records. +`notification:route_schedule` stores one member per scheduled route where score += `next_attempt_at_ms` and member = full Redis route key with encoded dynamic +segments. +Newly accepted publishable routes enter the schedule immediately with +`status=pending` and `next_attempt_at_ms = accepted_at_ms`. +`failed` routes remain scheduled for retry. +`published`, `dead_letter`, and `skipped` are absent from the schedule. +Only the current lease holder may finalize one due publication attempt. + +## Retry And Dead-Letter Policy + +Retry budgets are channel-specific: + +- `push` publication to `Gateway`: `3` attempts total +- `email` publication to `Mail Service`: `7` attempts total + +Rules: + +- the first publication attempt happens immediately at `accepted_at_ms` +- after failed attempt `N`, the next delay is `clamp(NOTIFICATION_ROUTE_BACKOFF_MIN * 2^(N-1), NOTIFICATION_ROUTE_BACKOFF_MIN, NOTIFICATION_ROUTE_BACKOFF_MAX)` +- no jitter is added to the retry delay +- `push` and `email` routes are retried independently +- the shared schedule is filtered by route prefix so `push` publishers claim + only `push:` routes and `email` publishers claim only `email:` routes +- `push` and `email` replicas coordinate through + `notification:route_leases::` with + `NOTIFICATION_ROUTE_LEASE_TTL` +- `push` publication failures are classified minimally as + `payload_encoding_failed` and `gateway_stream_publish_failed` +- `email` publication failures are classified minimally as + `payload_encoding_failed` and `mail_stream_publish_failed` +- when a route exhausts its retry budget, it transitions to `dead_letter`, + creates `notification_dead_letter_entry`, and is removed from + `notification:route_schedule` +- one exhausted route entering `dead_letter` must not roll back or invalidate a + sibling route that already reached `published` +- service restarts resume from durable route state and persisted stream offsets + +Retention rules: + +- `notification_record` and `notification_route` use + `NOTIFICATION_RECORD_TTL` +- `notification_idempotency_record` uses `NOTIFICATION_IDEMPOTENCY_TTL` +- `notification_dead_letter_entry` and malformed intent records use + `NOTIFICATION_DEAD_LETTER_TTL` +- stream offset records do not use TTL + +## Observability + +The service instruments: + +- internal probe HTTP requests +- internal probe HTTP listener startup and shutdown events +- structured logs for accepted, duplicate, and rejected notification intents +- structured logs for `push` and `email` route publication, retry, and + dead-letter transitions +- accepted and duplicate intent outcomes +- malformed intents, including idempotency conflicts and unresolved recipients +- user-enrichment lookup outcomes +- route publish attempts, retries, and dead-letter transitions +- current route-schedule depth and oldest scheduled route age +- oldest unprocessed intent stream entry age + +Metric names: + +- `notification.intent.outcomes` +- `notification.intent.malformed` +- `notification.user_enrichment.attempts` +- `notification.route.publish_attempts` +- `notification.route.retries` +- `notification.route.dead_letters` +- `notification.route_schedule.depth` +- `notification.route_schedule.oldest_age_ms` +- `notification.intent_stream.oldest_unprocessed_age_ms` + +Metrics intentionally avoid high-cardinality attributes such as `user_id`, +email address, `notification_id`, `route_id`, and `idempotency_key`. + +Metric attributes may include `notification_type`, `producer`, +`audience_kind`, `channel`, `result`, `outcome`, `failure_code`, and +`failure_classification`. + +Structured logs for intent intake, duplicate resolution, malformed-intent +recording, route publication, retry scheduling, and dead-letter transitions use +the same field names where the value exists: + +- `notification_id` +- `notification_type` +- `producer` +- `audience_kind` +- `idempotency_key` +- `route_id` +- `channel` +- `request_id` +- `trace_id` + +OpenTelemetry trace context is logged as `otel_trace_id` and `otel_span_id` +when the active context carries a valid span. + +## Recovery + +The supported manual replay path for a dead-lettered notification route is to +publish a new compatible intent to `notification:intents`. + +Recovery rules: + +- inspect the `notification_dead_letter_entry`, `notification_route`, and + owning `notification_record` +- confirm the downstream dependency or payload problem has been corrected +- publish a new intent with the same semantic `payload_json` and audience + fields, but with a new producer-owned `idempotency_key` +- keep the old `notification_dead_letter_entry` untouched as audit history + until its configured TTL expires + +Manual Redis mutation of an existing route record or +`notification:route_schedule` is not a supported replay workflow. + +## Verification + +Focused service-local coverage verifies: + +- configuration loading and validation +- `GET /healthz` +- `GET /readyz` +- absence of `/metrics` +- Redis startup fast-fail behavior +- graceful shutdown of the private probe listener +- valid intent acceptance +- malformed intent rejection +- duplicate and conflicting duplicate handling +- user-targeted route enrichment from `User Service` +- `recipient_not_found` malformed-intent recording for unresolved `user_id` +- temporary `User Service` failure handling without stream-offset advance +- FlatBuffers payload encoding for all seven user-facing `push` + `notification_type` values +- template-mode `Mail Service` command encoding for user and administrator + `email` routes +- due-route loading, lease acquisition, route publication, retry reschedule, + and dead-letter persistence in Redis +- `push` worker success, retry, and duplicate-prevention behavior across + concurrent replicas +- `email` worker success, retry, and duplicate-prevention behavior across + concurrent replicas +- OpenTelemetry metric recording for intent outcomes, malformed intents, user + enrichment, route publication attempts, retries, dead letters, route-schedule + gauges, and intent-stream lag +- Redis-backed route-schedule and intent-stream lag snapshots +- structured log field helper coverage through intake and publisher tests +- intent-consumer restart from `0-0` and from persisted stream offsets +- runtime wiring of the intent consumer and both route publishers +- shared `galaxy/notificationintent` producer constructors, validation, and + Redis Stream publication compatibility + +Cross-service coverage verifies: + +- `Notification Service -> User Service` enrichment compatibility and failure + handling +- `Notification Service -> Gateway` push compatibility for every user-facing + `notification_type` +- `Notification Service -> Mail Service` template-mode handoff for every + supported email type +- producer compatibility for `Game Master`, `Game Lobby`, and + `Geo Profile Service` through `galaxy/notificationintent` +- explicit regression that auth-code email still bypasses `Notification Service` +- real black-box `Notification Service -> Gateway` push fan-out coverage +- real black-box `Notification Service -> Mail Service` template-mode handoff + coverage + +Real producer-boundary suites for `Game Master`, `Game Lobby`, and +`Geo Profile Service` should be added only when those service boundaries exist +in code. diff --git a/notification/api/intents-asyncapi.yaml b/notification/api/intents-asyncapi.yaml new file mode 100644 index 0000000..1483ce7 --- /dev/null +++ b/notification/api/intents-asyncapi.yaml @@ -0,0 +1,556 @@ +asyncapi: 3.1.0 +info: + title: Notification Service Intent Contract + version: 1.0.0 + description: | + Stable Redis Streams contract for normalized notification intents + published by upstream services toward Notification Service. +channels: + intents: + address: notification:intents + messages: + notificationIntent: + $ref: '#/components/messages/NotificationIntent' +operations: + publishNotificationIntent: + action: send + summary: Publish one normalized notification intent. + channel: + $ref: '#/channels/intents' + messages: + - $ref: '#/channels/intents/messages/notificationIntent' +components: + messages: + NotificationIntent: + name: NotificationIntent + title: Notification intent + summary: One normalized notification request published into Notification Service. + payload: + $ref: '#/components/schemas/NotificationIntentEnvelope' + examples: + - name: gameTurnReady + summary: User-targeted game-turn notification. + payload: + notification_type: game.turn.ready + producer: game_master + audience_kind: user + recipient_user_ids_json: '["user-1","user-2"]' + idempotency_key: game-master:game-123:turn-54 + occurred_at_ms: "1775121700000" + request_id: request-123 + trace_id: trace-123 + payload_json: '{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}' + - name: geoReviewRecommended + summary: Administrator email notification. + payload: + notification_type: geo.review_recommended + producer: geoprofile + audience_kind: admin_email + idempotency_key: geoprofile:user-123:review-true:1775121700001 + occurred_at_ms: "1775121700001" + payload_json: '{"user_id":"user-123","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}' + - name: lobbyApplicationSubmittedPublic + summary: Public-game application notification sent to configured admins. + payload: + notification_type: lobby.application.submitted + producer: game_lobby + audience_kind: admin_email + idempotency_key: game-lobby:game-456:application-submitted:user-42 + occurred_at_ms: "1775121700002" + payload_json: '{"game_id":"game-456","game_name":"Orion Front","applicant_user_id":"user-42","applicant_name":"Nova Pilot"}' + schemas: + NotificationIntentEnvelope: + type: object + additionalProperties: false + description: | + Stable producer-to-notification envelope for one normalized + notification intent. + + Duplicate handling is scoped by `(producer, idempotency_key)`. + A replay with the same normalized content is a successful duplicate. + A replay with different normalized content is a conflict. + + `request_id` and `trace_id` are observability-only metadata and do not + participate in idempotency fingerprinting. + required: + - notification_type + - producer + - audience_kind + - idempotency_key + - occurred_at_ms + - payload_json + properties: + notification_type: + type: string + enum: + - geo.review_recommended + - game.turn.ready + - game.finished + - game.generation_failed + - lobby.runtime_paused_after_start + - lobby.application.submitted + - lobby.membership.approved + - lobby.membership.rejected + - lobby.invite.created + - lobby.invite.redeemed + - lobby.invite.expired + description: | + Exact v1 notification type catalog. `lobby.invite.revoked` + deliberately remains outside the supported catalog because it + produces no notification. + producer: + type: string + enum: + - geoprofile + - game_master + - game_lobby + description: | + Stable producer identifier. The exact producer value is frozen per + `notification_type` by the v1 catalog. + audience_kind: + type: string + enum: + - user + - admin_email + description: | + Delivery audience selector. + `user` targets concrete `user_id` values from the producer. + `admin_email` targets configured administrator email lists. + recipient_user_ids_json: + type: string + description: | + JSON-encoded array of unique stable `user_id` values. + + Required for `audience_kind=user`. Forbidden for + `audience_kind=admin_email`. + + `Notification Service` treats the recipient set as unordered for + idempotency purposes: duplicate `user_id` values are invalid and + element order does not change normalized content. + contentMediaType: application/json + contentSchema: + type: array + minItems: 1 + uniqueItems: true + items: + type: string + minLength: 1 + idempotency_key: + type: string + minLength: 1 + description: | + Producer-owned idempotency key scoped together with `producer`. + occurred_at_ms: + type: string + pattern: '^[0-9]+$' + description: Milliseconds since Unix epoch as a base-10 string. + request_id: + type: string + description: Optional observability request identifier. + trace_id: + type: string + description: Optional observability trace identifier. + payload_json: + type: string + description: | + JSON-encoded type-specific payload. Payload normalization ignores + insignificant whitespace and object key order, while array order + remains significant. Required payload fields are frozen per + `notification_type`. + contentMediaType: application/json + contentSchema: + type: object + additionalProperties: true + allOf: + - if: + properties: + audience_kind: + const: user + required: + - audience_kind + then: + required: + - recipient_user_ids_json + - if: + properties: + audience_kind: + const: admin_email + required: + - audience_kind + then: + not: + required: + - recipient_user_ids_json + - if: + properties: + notification_type: + const: geo.review_recommended + required: + - notification_type + then: + properties: + producer: + const: geoprofile + audience_kind: + const: admin_email + payload_json: + contentSchema: + $ref: '#/components/schemas/GeoReviewRecommendedPayload' + - if: + properties: + notification_type: + const: game.turn.ready + required: + - notification_type + then: + properties: + producer: + const: game_master + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/GameTurnReadyPayload' + - if: + properties: + notification_type: + const: game.finished + required: + - notification_type + then: + properties: + producer: + const: game_master + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/GameFinishedPayload' + - if: + properties: + notification_type: + const: game.generation_failed + required: + - notification_type + then: + properties: + producer: + const: game_master + audience_kind: + const: admin_email + payload_json: + contentSchema: + $ref: '#/components/schemas/GameGenerationFailedPayload' + - if: + properties: + notification_type: + const: lobby.runtime_paused_after_start + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: admin_email + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyRuntimePausedAfterStartPayload' + - if: + properties: + notification_type: + const: lobby.application.submitted + required: + - notification_type + then: + properties: + producer: + const: game_lobby + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyApplicationSubmittedPayload' + oneOf: + - properties: + audience_kind: + const: user + required: + - audience_kind + - properties: + audience_kind: + const: admin_email + required: + - audience_kind + - if: + properties: + notification_type: + const: lobby.membership.approved + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyMembershipApprovedPayload' + - if: + properties: + notification_type: + const: lobby.membership.rejected + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyMembershipRejectedPayload' + - if: + properties: + notification_type: + const: lobby.invite.created + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyInviteCreatedPayload' + - if: + properties: + notification_type: + const: lobby.invite.redeemed + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyInviteRedeemedPayload' + - if: + properties: + notification_type: + const: lobby.invite.expired + required: + - notification_type + then: + properties: + producer: + const: game_lobby + audience_kind: + const: user + payload_json: + contentSchema: + $ref: '#/components/schemas/LobbyInviteExpiredPayload' + GeoReviewRecommendedPayload: + type: object + additionalProperties: true + required: + - user_id + - user_email + - observed_country + - usual_connection_country + - review_reason + properties: + user_id: + type: string + minLength: 1 + user_email: + type: string + minLength: 1 + observed_country: + type: string + minLength: 1 + usual_connection_country: + type: string + minLength: 1 + review_reason: + type: string + minLength: 1 + GameTurnReadyPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - turn_number + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + turn_number: + type: integer + minimum: 1 + GameFinishedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - final_turn_number + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + final_turn_number: + type: integer + minimum: 1 + GameGenerationFailedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - failure_reason + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + failure_reason: + type: string + minLength: 1 + LobbyRuntimePausedAfterStartPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + LobbyApplicationSubmittedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - applicant_user_id + - applicant_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + applicant_user_id: + type: string + minLength: 1 + applicant_name: + type: string + minLength: 1 + LobbyMembershipApprovedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + LobbyMembershipRejectedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + LobbyInviteCreatedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - inviter_user_id + - inviter_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + inviter_user_id: + type: string + minLength: 1 + inviter_name: + type: string + minLength: 1 + LobbyInviteRedeemedPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - invitee_user_id + - invitee_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + invitee_user_id: + type: string + minLength: 1 + invitee_name: + type: string + minLength: 1 + LobbyInviteExpiredPayload: + type: object + additionalProperties: true + required: + - game_id + - game_name + - invitee_user_id + - invitee_name + properties: + game_id: + type: string + minLength: 1 + game_name: + type: string + minLength: 1 + invitee_user_id: + type: string + minLength: 1 + invitee_name: + type: string + minLength: 1 diff --git a/notification/cmd/notification/main.go b/notification/cmd/notification/main.go new file mode 100644 index 0000000..19300d4 --- /dev/null +++ b/notification/cmd/notification/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "galaxy/notification/internal/app" + "galaxy/notification/internal/config" + "galaxy/notification/internal/logging" +) + +func main() { + if err := run(); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "notification: %v\n", err) + os.Exit(1) + } +} + +func run() error { + cfg, err := config.LoadFromEnv() + if err != nil { + return err + } + + logger, err := logging.New(cfg.Logging.Level) + if err != nil { + return err + } + + rootCtx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + runtime, err := app.NewRuntime(rootCtx, cfg, logger) + if err != nil { + return err + } + defer func() { + _ = runtime.Close() + }() + + return runtime.Run(rootCtx) +} diff --git a/notification/contract_asyncapi_test.go b/notification/contract_asyncapi_test.go new file mode 100644 index 0000000..d231a7e --- /dev/null +++ b/notification/contract_asyncapi_test.go @@ -0,0 +1,591 @@ +package notification + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" +) + +type notificationCatalogExpectation struct { + producer string + audienceKind string + allowedAudienceKinds []string + payloadSchema string + requiredFields []string +} + +var expectedNotificationTypeCatalog = []string{ + "geo.review_recommended", + "game.turn.ready", + "game.finished", + "game.generation_failed", + "lobby.runtime_paused_after_start", + "lobby.application.submitted", + "lobby.membership.approved", + "lobby.membership.rejected", + "lobby.invite.created", + "lobby.invite.redeemed", + "lobby.invite.expired", +} + +var expectedNotificationCatalog = map[string]notificationCatalogExpectation{ + "geo.review_recommended": { + producer: "geoprofile", + audienceKind: "admin_email", + payloadSchema: "GeoReviewRecommendedPayload", + requiredFields: []string{"user_id", "user_email", "observed_country", "usual_connection_country", "review_reason"}, + }, + "game.turn.ready": { + producer: "game_master", + audienceKind: "user", + payloadSchema: "GameTurnReadyPayload", + requiredFields: []string{"game_id", "game_name", "turn_number"}, + }, + "game.finished": { + producer: "game_master", + audienceKind: "user", + payloadSchema: "GameFinishedPayload", + requiredFields: []string{"game_id", "game_name", "final_turn_number"}, + }, + "game.generation_failed": { + producer: "game_master", + audienceKind: "admin_email", + payloadSchema: "GameGenerationFailedPayload", + requiredFields: []string{"game_id", "game_name", "failure_reason"}, + }, + "lobby.runtime_paused_after_start": { + producer: "game_lobby", + audienceKind: "admin_email", + payloadSchema: "LobbyRuntimePausedAfterStartPayload", + requiredFields: []string{"game_id", "game_name"}, + }, + "lobby.application.submitted": { + producer: "game_lobby", + allowedAudienceKinds: []string{"user", "admin_email"}, + payloadSchema: "LobbyApplicationSubmittedPayload", + requiredFields: []string{"game_id", "game_name", "applicant_user_id", "applicant_name"}, + }, + "lobby.membership.approved": { + producer: "game_lobby", + audienceKind: "user", + payloadSchema: "LobbyMembershipApprovedPayload", + requiredFields: []string{"game_id", "game_name"}, + }, + "lobby.membership.rejected": { + producer: "game_lobby", + audienceKind: "user", + payloadSchema: "LobbyMembershipRejectedPayload", + requiredFields: []string{"game_id", "game_name"}, + }, + "lobby.invite.created": { + producer: "game_lobby", + audienceKind: "user", + payloadSchema: "LobbyInviteCreatedPayload", + requiredFields: []string{"game_id", "game_name", "inviter_user_id", "inviter_name"}, + }, + "lobby.invite.redeemed": { + producer: "game_lobby", + audienceKind: "user", + payloadSchema: "LobbyInviteRedeemedPayload", + requiredFields: []string{"game_id", "game_name", "invitee_user_id", "invitee_name"}, + }, + "lobby.invite.expired": { + producer: "game_lobby", + audienceKind: "user", + payloadSchema: "LobbyInviteExpiredPayload", + requiredFields: []string{"game_id", "game_name", "invitee_user_id", "invitee_name"}, + }, +} + +const expectedNotificationCatalogTable = `| ` + "`notification_type`" + ` | Producer | Audience | Channels | Required ` + "`payload_json`" + ` fields | +| --- | --- | --- | --- | --- | +| ` + "`geo.review_recommended`" + ` | ` + "`Geo Profile Service`" + ` (` + "`geoprofile`" + `) | configured admin email list (` + "`audience_kind=admin_email`" + `) | ` + "`email`" + ` | ` + "`user_id`" + `, ` + "`user_email`" + `, ` + "`observed_country`" + `, ` + "`usual_connection_country`" + `, ` + "`review_reason`" + ` | +| ` + "`game.turn.ready`" + ` | ` + "`Game Master`" + ` (` + "`game_master`" + `) | active accepted participants (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`turn_number`" + ` | +| ` + "`game.finished`" + ` | ` + "`Game Master`" + ` (` + "`game_master`" + `) | active accepted participants (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`final_turn_number`" + ` | +| ` + "`game.generation_failed`" + ` | ` + "`Game Master`" + ` (` + "`game_master`" + `) | configured admin email list (` + "`audience_kind=admin_email`" + `) | ` + "`email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`failure_reason`" + ` | +| ` + "`lobby.runtime_paused_after_start`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | configured admin email list (` + "`audience_kind=admin_email`" + `) | ` + "`email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + ` | +| ` + "`lobby.application.submitted`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | private owner (` + "`audience_kind=user`" + `) or public admins (` + "`audience_kind=admin_email`" + `) | private: ` + "`push+email`" + `, public: ` + "`email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`applicant_user_id`" + `, ` + "`applicant_name`" + ` | +| ` + "`lobby.membership.approved`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | applicant user (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + ` | +| ` + "`lobby.membership.rejected`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | applicant user (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + ` | +| ` + "`lobby.invite.created`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | invited user (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`inviter_user_id`" + `, ` + "`inviter_name`" + ` | +| ` + "`lobby.invite.redeemed`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | private-game owner (` + "`audience_kind=user`" + `) | ` + "`push+email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`invitee_user_id`" + `, ` + "`invitee_name`" + ` | +| ` + "`lobby.invite.expired`" + ` | ` + "`Game Lobby`" + ` (` + "`game_lobby`" + `) | private-game owner (` + "`audience_kind=user`" + `) | ` + "`email`" + ` | ` + "`game_id`" + `, ` + "`game_name`" + `, ` + "`invitee_user_id`" + `, ` + "`invitee_name`" + ` |` + +var expectedSharedDocumentationSnippets = []string{ + "`lobby.application.submitted` keeps one stable `notification_type` and one stable `payload_json` shape", + "`lobby.invite.revoked` deliberately produces no notification in v1", + "private-game invite notifications remain user-bound by internal `user_id`", +} + +func TestIntentAsyncAPISpecLoads(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + require.Equal(t, "3.1.0", getStringValue(t, doc, "asyncapi")) +} + +func TestIntentAsyncAPISpecFreezesChannelAndOperation(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + + channel := getMapValue(t, doc, "channels", "intents") + require.Equal(t, "notification:intents", getStringValue(t, channel, "address")) + + channelMessages := getMapValue(t, channel, "messages") + require.Equal( + t, + "#/components/messages/NotificationIntent", + getStringValue(t, getMapValue(t, channelMessages, "notificationIntent"), "$ref"), + ) + + operation := getMapValue(t, doc, "operations", "publishNotificationIntent") + require.Equal(t, "send", getStringValue(t, operation, "action")) + require.Equal(t, "#/channels/intents", getStringValue(t, getMapValue(t, operation, "channel"), "$ref")) + + messageRefs := getSliceValue(t, operation, "messages") + require.Len(t, messageRefs, 1) + require.Equal( + t, + "#/channels/intents/messages/notificationIntent", + getStringValue(t, messageRefs[0].(map[string]any), "$ref"), + ) +} + +func TestIntentAsyncAPISpecFreezesEnvelopeSchema(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + schemas := getMapValue(t, getMapValue(t, doc, "components"), "schemas") + envelope := getMapValue(t, schemas, "NotificationIntentEnvelope") + + require.ElementsMatch( + t, + []any{ + "notification_type", + "producer", + "audience_kind", + "idempotency_key", + "occurred_at_ms", + "payload_json", + }, + getSliceValue(t, envelope, "required"), + ) + + properties := getMapValue(t, envelope, "properties") + require.ElementsMatch( + t, + []string{ + "notification_type", + "producer", + "audience_kind", + "recipient_user_ids_json", + "idempotency_key", + "occurred_at_ms", + "request_id", + "trace_id", + "payload_json", + }, + mapKeys(properties), + ) + + notificationType := getMapValue(t, properties, "notification_type") + require.Equal(t, "string", getStringValue(t, notificationType, "type")) + require.Equal(t, expectedNotificationTypeCatalog, getStringSlice(t, notificationType, "enum")) + require.Contains(t, getStringValue(t, notificationType, "description"), "Exact v1 notification type catalog") + require.Contains(t, getStringValue(t, notificationType, "description"), "`lobby.invite.revoked`") + + producer := getMapValue(t, properties, "producer") + require.Equal(t, "string", getStringValue(t, producer, "type")) + require.Equal(t, []string{"geoprofile", "game_master", "game_lobby"}, getStringSlice(t, producer, "enum")) + + occurredAt := getMapValue(t, properties, "occurred_at_ms") + require.Equal(t, "string", getStringValue(t, occurredAt, "type")) + require.Equal(t, "^[0-9]+$", getStringValue(t, occurredAt, "pattern")) + + payloadJSON := getMapValue(t, properties, "payload_json") + require.Equal(t, "string", getStringValue(t, payloadJSON, "type")) + require.Equal(t, "application/json", getStringValue(t, payloadJSON, "contentMediaType")) + require.Contains(t, getStringValue(t, payloadJSON, "description"), "Required payload fields are frozen") + contentSchema := getMapValue(t, payloadJSON, "contentSchema") + require.Equal(t, "object", getStringValue(t, contentSchema, "type")) + require.Equal(t, true, getScalarValue(t, contentSchema, "additionalProperties")) +} + +func TestIntentAsyncAPISpecFreezesAudienceRulesAndRecipientNormalization(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + schemas := getMapValue(t, getMapValue(t, doc, "components"), "schemas") + envelope := getMapValue(t, schemas, "NotificationIntentEnvelope") + properties := getMapValue(t, envelope, "properties") + + audienceKind := getMapValue(t, properties, "audience_kind") + require.Equal(t, []string{"user", "admin_email"}, getStringSlice(t, audienceKind, "enum")) + + recipients := getMapValue(t, properties, "recipient_user_ids_json") + require.Equal(t, "string", getStringValue(t, recipients, "type")) + require.Equal(t, "application/json", getStringValue(t, recipients, "contentMediaType")) + + recipientSchema := getMapValue(t, recipients, "contentSchema") + require.Equal(t, "array", getStringValue(t, recipientSchema, "type")) + require.EqualValues(t, 1, getScalarValue(t, recipientSchema, "minItems")) + require.Equal(t, true, getScalarValue(t, recipientSchema, "uniqueItems")) + + recipientItems := getMapValue(t, recipientSchema, "items") + require.Equal(t, "string", getStringValue(t, recipientItems, "type")) + require.EqualValues(t, 1, getScalarValue(t, recipientItems, "minLength")) + + allOf := getSliceValue(t, envelope, "allOf") + userRule := findConditionalRuleByIfConst(t, allOf, "audience_kind", "user") + require.ElementsMatch( + t, + []any{"recipient_user_ids_json"}, + getSliceValue(t, getMapValue(t, userRule, "then"), "required"), + ) + + adminRule := findConditionalRuleByIfConst(t, allOf, "audience_kind", "admin_email") + require.ElementsMatch( + t, + []any{"recipient_user_ids_json"}, + getSliceValue(t, getMapValue(t, getMapValue(t, adminRule, "then"), "not"), "required"), + ) + + require.Contains(t, getStringValue(t, recipients, "description"), "unordered") + require.Contains(t, getStringValue(t, recipients, "description"), "element order does not change normalized content") +} + +func TestIntentAsyncAPISpecFreezesNotificationCatalogBranches(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + components := getMapValue(t, doc, "components") + schemas := getMapValue(t, components, "schemas") + envelope := getMapValue(t, schemas, "NotificationIntentEnvelope") + allOf := getSliceValue(t, envelope, "allOf") + + for _, notificationType := range expectedNotificationTypeCatalog { + expectation := expectedNotificationCatalog[notificationType] + rule := findConditionalRuleByIfConst(t, allOf, "notification_type", notificationType) + thenSchema := getMapValue(t, rule, "then") + thenProperties := getMapValue(t, thenSchema, "properties") + + require.Equal( + t, + expectation.producer, + getScalarValue(t, getMapValue(t, thenProperties, "producer"), "const"), + ) + require.Equal( + t, + "#/components/schemas/"+expectation.payloadSchema, + getStringValue(t, getMapValue(t, getMapValue(t, thenProperties, "payload_json"), "contentSchema"), "$ref"), + ) + + if len(expectation.allowedAudienceKinds) > 0 { + oneOf := getSliceValue(t, thenSchema, "oneOf") + require.Len(t, oneOf, len(expectation.allowedAudienceKinds)) + + actualAudienceKinds := make([]string, 0, len(oneOf)) + for _, rawBranch := range oneOf { + branch := rawBranch.(map[string]any) + actualAudienceKinds = append( + actualAudienceKinds, + getScalarValue(t, getMapValue(t, getMapValue(t, branch, "properties"), "audience_kind"), "const").(string), + ) + } + require.ElementsMatch(t, expectation.allowedAudienceKinds, actualAudienceKinds) + } else { + require.Equal( + t, + expectation.audienceKind, + getScalarValue(t, getMapValue(t, thenProperties, "audience_kind"), "const"), + ) + } + + payloadSchema := getMapValue(t, schemas, expectation.payloadSchema) + require.Equal(t, "object", getStringValue(t, payloadSchema, "type")) + require.Equal(t, true, getScalarValue(t, payloadSchema, "additionalProperties")) + require.ElementsMatch(t, toAnySlice(expectation.requiredFields), getSliceValue(t, payloadSchema, "required")) + } + + notificationType := getMapValue(t, getMapValue(t, envelope, "properties"), "notification_type") + require.NotContains(t, getStringSlice(t, notificationType, "enum"), "lobby.invite.revoked") +} + +func TestIntentAsyncAPISpecFreezesExamplesAndIdempotencyRules(t *testing.T) { + t.Parallel() + + doc := loadAsyncAPISpec(t) + components := getMapValue(t, doc, "components") + messages := getMapValue(t, components, "messages") + schemas := getMapValue(t, components, "schemas") + + examples := getSliceValue(t, getMapValue(t, messages, "NotificationIntent"), "examples") + require.GreaterOrEqual(t, len(examples), 3) + + userExamplePayload := getMapValue(t, findNamedExample(t, examples, "gameTurnReady"), "payload") + require.Equal(t, "game.turn.ready", getStringValue(t, userExamplePayload, "notification_type")) + require.Equal(t, "game_master", getStringValue(t, userExamplePayload, "producer")) + require.Equal(t, "user", getStringValue(t, userExamplePayload, "audience_kind")) + require.NotEmpty(t, getStringValue(t, userExamplePayload, "recipient_user_ids_json")) + + adminExamplePayload := getMapValue(t, findNamedExample(t, examples, "geoReviewRecommended"), "payload") + require.Equal(t, "geo.review_recommended", getStringValue(t, adminExamplePayload, "notification_type")) + require.Equal(t, "geoprofile", getStringValue(t, adminExamplePayload, "producer")) + require.Equal(t, "admin_email", getStringValue(t, adminExamplePayload, "audience_kind")) + _, hasRecipients := adminExamplePayload["recipient_user_ids_json"] + require.False(t, hasRecipients) + + publicApplicationPayload := getMapValue(t, findNamedExample(t, examples, "lobbyApplicationSubmittedPublic"), "payload") + require.Equal(t, "lobby.application.submitted", getStringValue(t, publicApplicationPayload, "notification_type")) + require.Equal(t, "game_lobby", getStringValue(t, publicApplicationPayload, "producer")) + require.Equal(t, "admin_email", getStringValue(t, publicApplicationPayload, "audience_kind")) + _, hasApplicationRecipients := publicApplicationPayload["recipient_user_ids_json"] + require.False(t, hasApplicationRecipients) + + envelope := getMapValue(t, schemas, "NotificationIntentEnvelope") + description := getStringValue(t, envelope, "description") + require.Contains(t, description, "(producer, idempotency_key)") + require.Contains(t, description, "same normalized content is a successful duplicate") + require.Contains(t, description, "different normalized content is a conflict") + require.Contains(t, description, "`request_id` and `trace_id` are observability-only metadata") + + payloadJSON := getMapValue(t, getMapValue(t, envelope, "properties"), "payload_json") + require.Contains(t, getStringValue(t, payloadJSON, "description"), "object key order") + require.Contains(t, getStringValue(t, payloadJSON, "description"), "array order") + require.Contains(t, getStringValue(t, payloadJSON, "description"), "remains significant") +} + +func TestNotificationCatalogDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + + require.Contains(t, readme, expectedNotificationCatalogTable) + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + + for _, snippet := range expectedSharedDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("Producer -> Notification")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("XADD normalized intent")) +} + +func loadAsyncAPISpec(t *testing.T) map[string]any { + t.Helper() + + payload := loadTextFile(t, filepath.Join("api", "intents-asyncapi.yaml")) + + var doc map[string]any + if err := yaml.Unmarshal([]byte(payload), &doc); err != nil { + require.Failf(t, "test failed", "decode spec: %v", err) + } + + return doc +} + +func loadTextFile(t *testing.T, relativePath string) string { + t.Helper() + + path := filepath.Join(moduleRoot(t), relativePath) + payload, err := os.ReadFile(path) + if err != nil { + require.Failf(t, "test failed", "read file %s: %v", path, err) + } + + return string(payload) +} + +func moduleRoot(t *testing.T) string { + t.Helper() + + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + require.FailNow(t, "runtime.Caller failed") + } + + return filepath.Dir(thisFile) +} + +func findConditionalRuleByIfConst(t *testing.T, rules []any, property, constValue string) map[string]any { + t.Helper() + + for _, rawRule := range rules { + rule, ok := rawRule.(map[string]any) + if !ok { + continue + } + + ifSchema, ok := rule["if"].(map[string]any) + if !ok { + continue + } + properties, ok := ifSchema["properties"].(map[string]any) + if !ok { + continue + } + propertySchema, ok := properties[property].(map[string]any) + if !ok { + continue + } + + if actual, ok := propertySchema["const"].(string); ok && actual == constValue { + return rule + } + } + + require.FailNowf(t, "test failed", "conditional rule for %s=%s not found", property, constValue) + return nil +} + +func findNamedExample(t *testing.T, examples []any, name string) map[string]any { + t.Helper() + + for _, rawExample := range examples { + example, ok := rawExample.(map[string]any) + if !ok { + continue + } + if getStringValue(t, example, "name") == name { + return example + } + } + + require.FailNowf(t, "test failed", "example %s not found", name) + return nil +} + +func getMapValue(t *testing.T, value map[string]any, path ...string) map[string]any { + t.Helper() + + current := value + for _, segment := range path { + raw, ok := current[segment] + if !ok { + require.Failf(t, "test failed", "missing map key %s", segment) + } + next, ok := raw.(map[string]any) + if !ok { + require.Failf(t, "test failed", "value at %s is not a map", segment) + } + current = next + } + + return current +} + +func getStringValue(t *testing.T, value map[string]any, key string) string { + t.Helper() + + raw, ok := value[key] + if !ok { + require.Failf(t, "test failed", "missing key %s", key) + } + result, ok := raw.(string) + if !ok { + require.Failf(t, "test failed", "value at %s is not a string", key) + } + + return result +} + +func getStringSlice(t *testing.T, value map[string]any, key string) []string { + t.Helper() + + raw := getSliceValue(t, value, key) + result := make([]string, 0, len(raw)) + for _, item := range raw { + text, ok := item.(string) + if !ok { + require.Failf(t, "test failed", "value at %s is not a string slice", key) + } + result = append(result, text) + } + + return result +} + +func getScalarValue(t *testing.T, value map[string]any, key string) any { + t.Helper() + + raw, ok := value[key] + if !ok { + require.Failf(t, "test failed", "missing key %s", key) + } + + return raw +} + +func getSliceValue(t *testing.T, value map[string]any, key string) []any { + t.Helper() + + raw, ok := value[key] + if !ok { + require.Failf(t, "test failed", "missing key %s", key) + } + result, ok := raw.([]any) + if !ok { + require.Failf(t, "test failed", "value at %s is not a slice", key) + } + + return result +} + +func mapKeys(value map[string]any) []string { + keys := make([]string, 0, len(value)) + for key := range value { + keys = append(keys, key) + } + + return keys +} + +func toAnySlice(values []string) []any { + result := make([]any, 0, len(values)) + for _, value := range values { + result = append(result, value) + } + + return result +} + +func normalizeWhitespace(value string) string { + return strings.ToLower(strings.Join(strings.Fields(value), " ")) +} + +func TestGatewayREADMEFreezesExactPushVocabulary(t *testing.T) { + t.Parallel() + + gatewayReadme := loadTextFile(t, filepath.Join("..", "gateway", "README.md")) + + require.Contains(t, gatewayReadme, "The initial notification event vocabulary\nin v1 is exactly:") + require.Contains( + t, + gatewayReadme, + strings.Join([]string{ + "- `game.turn.ready`", + "- `game.finished`", + "- `lobby.application.submitted`", + "- `lobby.membership.approved`", + "- `lobby.membership.rejected`", + "- `lobby.invite.created`", + "- `lobby.invite.redeemed`", + }, "\n"), + ) + require.Contains( + t, + gatewayReadme, + "`lobby.application.submitted` is published toward `Gateway` only for the\nprivate-game owner flow. The public-game variant is email-only.", + ) +} diff --git a/notification/docs/README.md b/notification/docs/README.md new file mode 100644 index 0000000..e2b0b13 --- /dev/null +++ b/notification/docs/README.md @@ -0,0 +1,25 @@ +# Notification Service Docs + +This directory keeps service-local documentation that is more operational or +more example-heavy than [`../README.md`](../README.md). + +Sections: + +- [Runtime and components](runtime.md) +- [Main flows](flows.md) +- [Operator runbook](runbook.md) +- [Configuration and contract examples](examples.md) + +Primary references: + +- [`../README.md`](../README.md) for stable service scope, contracts, data + model, Redis layout, and retry policy +- [`../api/intents-asyncapi.yaml`](../api/intents-asyncapi.yaml) for the + producer-to-notification Redis Stream contract +- [`../openapi.yaml`](../openapi.yaml) for the private probe HTTP contract +- [`../../gateway/README.md`](../../gateway/README.md) for client-event fan-out +- [`../../mail/api/delivery-commands-asyncapi.yaml`](../../mail/api/delivery-commands-asyncapi.yaml) + for the trusted async generic mail command contract +- [`../../ARCHITECTURE.md`](../../ARCHITECTURE.md) for system-level service + boundaries and transport rules +- [`../../TESTING.md`](../../TESTING.md) for the cross-service testing matrix diff --git a/notification/docs/examples.md b/notification/docs/examples.md new file mode 100644 index 0000000..84d86ee --- /dev/null +++ b/notification/docs/examples.md @@ -0,0 +1,145 @@ +# Configuration and Contract Examples + +The examples below are illustrative. IDs, timestamps, and stream keys are +placeholders unless explicitly stated otherwise. + +## Example Environment + +Minimal local runtime: + +```dotenv +NOTIFICATION_REDIS_ADDR=127.0.0.1:6379 +NOTIFICATION_INTERNAL_HTTP_ADDR=:8092 +NOTIFICATION_USER_SERVICE_BASE_URL=http://127.0.0.1:8091 + +NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM=gateway:client-events +NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM=mail:delivery_commands + +NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED=geo-admin@example.com +NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED=ops@example.com +NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START=ops@example.com +NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED=admins@example.com + +OTEL_TRACES_EXPORTER=none +OTEL_METRICS_EXPORTER=none +``` + +## Probe HTTP Examples + +Liveness: + +```bash +curl http://127.0.0.1:8092/healthz +``` + +```json +{ + "status": "ok" +} +``` + +Readiness: + +```bash +curl http://127.0.0.1:8092/readyz +``` + +```json +{ + "status": "ready" +} +``` + +## User-Targeted Intent Example + +```bash +redis-cli XADD notification:intents '*' \ + notification_type game.turn.ready \ + producer game_master \ + audience_kind user \ + recipient_user_ids_json '["user-1","user-2"]' \ + idempotency_key game-master:game-123:turn-54 \ + occurred_at_ms 1775121700000 \ + request_id request-123 \ + trace_id trace-123 \ + payload_json '{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}' +``` + +Expected effects: + +- `Notification Service` resolves both users through `User Service` +- one `push` route and one `email` route are materialized per user +- `Gateway` receives user-wide client events without `device_session_id` +- `Mail Service` receives template-mode commands with + `template_id=game.turn.ready` + +## Administrator Intent Example + +```bash +redis-cli XADD notification:intents '*' \ + notification_type geo.review_recommended \ + producer geoprofile \ + audience_kind admin_email \ + idempotency_key geoprofile:user-123:review-true:1775121700001 \ + occurred_at_ms 1775121700001 \ + payload_json '{"user_id":"user-123","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}' +``` + +Expected effects: + +- `Notification Service` does not call `User Service` +- recipients are read from `NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED` +- only email routes are publishable; push route slots are skipped + +## Gateway Client Event Shape + +Example stream entry appended by `Notification Service`: + +```bash +redis-cli XADD gateway:client-events MAXLEN '~' 1024 '*' \ + user_id user-1 \ + event_type game.turn.ready \ + event_id '1775121700000-0/push:user:user-1' \ + payload_bytes '' \ + request_id request-123 \ + trace_id trace-123 +``` + +`Gateway` derives `timestamp_ms`, computes `payload_hash`, signs the outgoing +event, and delivers it to every active stream for `user-1`. + +## Mail Command Shape + +Example stream entry appended by `Notification Service`: + +```bash +redis-cli XADD mail:delivery_commands '*' \ + delivery_id '1775121700000-0/email:user:user-1' \ + source notification \ + payload_mode template \ + idempotency_key 'notification:1775121700000-0/email:user:user-1' \ + requested_at_ms 1775121700000 \ + request_id request-123 \ + trace_id trace-123 \ + payload_json '{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}' +``` + +## Dead-Letter Replay + +Replay a dead-lettered route by publishing a new compatible intent with a new +producer-owned `idempotency_key`. + +```bash +redis-cli XADD notification:intents '*' \ + notification_type game.turn.ready \ + producer game_master \ + audience_kind user \ + recipient_user_ids_json '["user-1"]' \ + idempotency_key game-master:game-123:turn-54:manual-replay-1 \ + occurred_at_ms 1775121700000 \ + payload_json '{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}' +``` + +Do not mutate existing `notification_route`, +`notification_dead_letter_entry`, or `notification:route_schedule` records as a +replay workflow. diff --git a/notification/docs/flows.md b/notification/docs/flows.md new file mode 100644 index 0000000..161bb96 --- /dev/null +++ b/notification/docs/flows.md @@ -0,0 +1,130 @@ +# Main Flows + +## Producer -> Notification + +```mermaid +sequenceDiagram + participant Producer + participant Stream as Redis Stream notification:intents + participant Consumer as Intent consumer + participant Notify as Notification Service + participant Redis + + Producer->>Stream: XADD normalized intent + Consumer->>Stream: XREAD from stored offset + Consumer->>Notify: decode and validate envelope + alt malformed intent + Notify->>Redis: record malformed-intent entry + Consumer->>Redis: save stream offset + else duplicate with same normalized content + Notify->>Redis: load accepted notification + Consumer->>Redis: save stream offset + else idempotency conflict + Notify->>Redis: record malformed-intent entry + Consumer->>Redis: save stream offset + else new valid intent + Notify->>Redis: store notification, routes, and idempotency record + Consumer->>Redis: save stream offset + end +``` + +Duplicate handling is scoped by `(producer, idempotency_key)`. `request_id` and +`trace_id` are observability-only metadata and do not participate in the +idempotency fingerprint. + +## User-Targeted Enrichment + +```mermaid +sequenceDiagram + participant Consumer as Intent consumer + participant Notify as Notification Service + participant User as User Service + participant Redis + + Consumer->>Notify: accepted user-targeted intent + loop each recipient_user_id + Notify->>User: GET /api/v1/internal/users/{user_id} + alt user exists + User-->>Notify: email + preferred_language + else subject_not_found + Notify->>Redis: record malformed intent recipient_not_found + Consumer->>Redis: save stream offset + else temporary failure + Notify-->>Consumer: service unavailable + Consumer-->>Consumer: stop before stream-offset advance + end + end + Notify->>Redis: persist enriched routes +``` + +User-targeted routes are enriched before durable route write. The currently +supported resolved locale is exactly `en`; unsupported or empty values fall +back to `en`. + +## Notification -> Gateway + +```mermaid +sequenceDiagram + participant Push as Push publisher + participant Redis + participant Gateway as Edge Gateway + participant Client + + Push->>Redis: load due push route + Push->>Redis: acquire temporary route lease + Push->>Push: encode FlatBuffers notification payload + Push->>Redis: XADD MAXLEN ~ gateway client-event stream + Push->>Redis: mark route published and remove from schedule + Gateway->>Redis: XREAD client-event stream + Gateway->>Gateway: sign outgoing GatewayEvent + Gateway-->>Client: fan out to all active user streams +``` + +`Notification Service` publishes `user_id`, `event_type`, `event_id`, +`payload_bytes`, and optional `request_id` / `trace_id`. It intentionally omits +`device_session_id`. + +## Notification -> Mail + +```mermaid +sequenceDiagram + participant Email as Email publisher + participant Redis + participant Mail as Mail Service + + Email->>Redis: load due email route + Email->>Redis: acquire temporary route lease + Email->>Email: encode template-mode command + Email->>Redis: XADD mail:delivery_commands + Email->>Redis: mark route published and remove from schedule + Mail->>Redis: XREAD mail:delivery_commands + Mail->>Mail: accept template delivery command +``` + +Notification-generated mail always uses `source=notification`, +`payload_mode=template`, and `template_id == notification_type`. +Auth-code mail is not part of this flow and remains a direct +`Auth / Session Service -> Mail Service` request. + +## Retry and Dead Letter + +```mermaid +sequenceDiagram + participant Publisher + participant Redis + participant Downstream as Gateway or Mail Service + + Publisher->>Redis: load due route + Publisher->>Redis: acquire temporary route lease + Publisher->>Downstream: append downstream stream entry + alt publication succeeds + Publisher->>Redis: mark published and remove schedule member + else retry budget remains + Publisher->>Redis: mark failed and schedule next attempt + else retry budget exhausted + Publisher->>Redis: mark dead_letter and write dead-letter entry + end +``` + +`push` and `email` retry independently. A dead-lettered route never rolls back +or invalidates a sibling route that already reached `published`. diff --git a/notification/docs/runbook.md b/notification/docs/runbook.md new file mode 100644 index 0000000..2b91355 --- /dev/null +++ b/notification/docs/runbook.md @@ -0,0 +1,167 @@ +# Operator Runbook + +This runbook covers startup, steady-state verification, shutdown, and common +`Notification Service` incidents. + +## Startup Checks + +Before starting the process, confirm: + +- `NOTIFICATION_REDIS_ADDR` points to the Redis deployment that stores + notification records, routes, idempotency reservations, malformed intents, + dead letters, stream offsets, and route schedules +- Redis ACL, DB, TLS, and timeout settings match the target environment +- `NOTIFICATION_USER_SERVICE_BASE_URL` points to the trusted internal + `User Service` +- `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM` matches the stream consumed by + `Gateway` +- `NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM` matches the stream consumed by + `Mail Service` +- administrator email variables are populated for notification types that + should notify administrators +- OpenTelemetry exporter settings point at the intended collector when traces + or metrics are expected outside the process + +At startup the process performs a bounded Redis `PING`. Startup fails fast if +configuration validation or Redis connectivity fails. + +Known startup caveats: + +- there is no operator API +- there is no `/metrics` route +- traces and metrics are exported only through configured OpenTelemetry + exporters +- readiness is process-local after successful startup + +## Steady-State Verification + +Practical readiness verification: + +1. confirm startup logs for the internal HTTP listener, intent consumer, push + publisher, and email publisher +2. request `GET /readyz` on `NOTIFICATION_INTERNAL_HTTP_ADDR` +3. verify Redis connectivity and OpenTelemetry exporter health out of band +4. publish a low-risk compatible test intent in a non-production environment + and verify route publication in the downstream stream + +Expected steady-state signals: + +- `notification.route_schedule.depth` remains bounded +- `notification.route_schedule.oldest_age_ms` stays near the active retry + ladder +- `notification.intent_stream.oldest_unprocessed_age_ms` remains near zero + when producers are healthy +- `notification.route.dead_letters` changes rarely +- malformed-intent logs appear only for bad producer input +- logs include `notification_type`, `producer`, `audience_kind`, and + correlation identifiers where present + +## Shutdown + +The process handles `SIGINT` and `SIGTERM`. + +Shutdown behavior: + +- coordinated shutdown is bounded by `NOTIFICATION_SHUTDOWN_TIMEOUT` +- the private probe listener is stopped before process resources are closed +- route publishers and the intent consumer stop through context cancellation +- Redis clients are closed after the app stops +- OpenTelemetry providers are flushed during runtime cleanup + +During a planned restart: + +1. send `SIGTERM` +2. wait for listener and worker shutdown logs +3. restart the process with the same Redis, stream, and downstream settings +4. repeat steady-state verification + +## Incident Triage + +### Intent Stream Lag Grows + +Symptoms: + +- `notification.intent_stream.oldest_unprocessed_age_ms` increases +- no matching route records appear for new stream entries +- consumer logs stop after a specific stream entry + +Checks: + +1. inspect the next unprocessed `notification:intents` entry +2. confirm `User Service` is reachable from `Notification Service` +3. if the entry is user-targeted, verify every `recipient_user_id` exists +4. inspect malformed-intent records for nearby stream IDs + +Expected behavior: + +- malformed input is recorded and the offset advances +- temporary `User Service` failure stops progress before offset advancement + +### Route Schedule Backlog Grows + +Symptoms: + +- `notification.route_schedule.depth` rises steadily +- `notification.route_schedule.oldest_age_ms` increases +- routes remain in `pending` or `failed` + +Checks: + +1. confirm push and email publisher startup logs are present +2. confirm Redis latency and connectivity +3. verify route IDs match the expected `push:` or `email:` prefixes +4. confirm the downstream stream names match `Gateway` and `Mail Service` +5. inspect route `last_error_classification` + +### Dead-Letter Spikes + +Symptoms: + +- `notification.route.dead_letters` increases rapidly +- route records show repeated `payload_encoding_failed`, + `gateway_stream_publish_failed`, or `mail_stream_publish_failed` + +Checks: + +1. inspect the dead-letter entry and owning route +2. verify payload fields still match the notification catalog +3. confirm downstream Redis stream writes are accepted +4. compare failures across channels to isolate Gateway-specific or + Mail-specific issues + +Recovery: + +1. correct the downstream dependency or payload problem +2. publish a new compatible intent with a new producer-owned + `idempotency_key` +3. keep the old dead-letter record untouched as audit history + +### Missing Administrator Mail + +Symptoms: + +- administrator notification type is accepted +- no email command reaches `mail:delivery_commands` +- route is `skipped` with recipient `config:` + +Checks: + +1. inspect the type-specific administrator email environment variable +2. confirm addresses are normalized single email addresses without display + names +3. restart the process after configuration changes + +Expected behavior: + +- empty administrator lists materialize one skipped synthetic route so the + configuration gap remains durable and visible + +### Auth-Code Mail Appears Missing + +Auth-code mail is intentionally outside `Notification Service`. + +Checks: + +1. inspect `Auth / Session Service -> Mail Service` logs and delivery records +2. confirm `notification:intents` remains unused for auth-code delivery +3. do not replay auth-code mail through `Notification Service` diff --git a/notification/docs/runtime.md b/notification/docs/runtime.md new file mode 100644 index 0000000..489c949 --- /dev/null +++ b/notification/docs/runtime.md @@ -0,0 +1,206 @@ +# Runtime and Components + +The diagram below focuses on the deployed `galaxy/notification` process and +its runtime dependencies. + +```mermaid +flowchart LR + subgraph Producers + GM["Game Master"] + Lobby["Game Lobby"] + Geo["Geo Profile Service"] + end + + subgraph Notify["Notification Service process"] + Probe["Private probe HTTP listener\n/healthz /readyz"] + Consumer["Notification intent consumer"] + Accept["Intent acceptance service"] + Push["Push route publisher"] + Email["Email route publisher"] + Telemetry["Logs, traces, metrics"] + end + + User["User Service"] + Gateway["Edge Gateway\nclient-event stream consumer"] + Mail["Mail Service\ncommand stream consumer"] + Redis["Redis\nstate + streams + schedules"] + + GM --> Redis + Lobby --> Redis + Geo --> Redis + Consumer --> Redis + Consumer --> Accept + Accept --> User + Accept --> Redis + Push --> Redis + Email --> Redis + Push --> Gateway + Email --> Mail + Probe --> Telemetry + Consumer --> Telemetry + Push --> Telemetry + Email --> Telemetry +``` + +## Listener + +`notification` exposes exactly one HTTP listener: + +| Listener | Default addr | Purpose | +| --- | --- | --- | +| Internal probe HTTP | `:8092` | Private liveness and readiness probes | + +Shared listener defaults: + +- read-header timeout: `2s` +- read timeout: `10s` +- idle timeout: `1m` + +Probe routes: + +- `GET /healthz` returns `{"status":"ok"}` +- `GET /readyz` returns `{"status":"ready"}` +- `readyz` is process-local after successful startup and does not perform a + live Redis ping per request + +Intentional omissions: + +- no public listener +- no operator API +- there is no `/metrics` route + +## Startup Wiring + +`cmd/notification` loads config, constructs logging, and builds the runtime +through `internal/app.NewRuntime`. + +The runtime wires: + +- Redis client with startup connectivity check +- `User Service` HTTP client for recipient enrichment +- private probe HTTP server +- plain `XREAD` intent consumer +- `push` route publisher for `Gateway` +- `email` route publisher for `Mail Service` +- Redis-backed accepted-intent, route, idempotency, malformed-intent, + dead-letter, stream-offset, and schedule stores +- OpenTelemetry traces and metrics exporters + +Startup fails fast on invalid configuration or unavailable Redis. + +## Background Components + +### Intent consumer + +- reads one plain `XREAD` stream, default `notification:intents` +- starts from stored offset or `0-0` +- advances offset only after durable acceptance or durable malformed-intent + recording +- stops without offset advancement when `User Service` enrichment has a + temporary failure + +### Acceptance service + +- validates the normalized intent envelope +- applies idempotency rules for `(producer, idempotency_key)` +- enriches user-targeted recipients before durable route write +- materializes route slots for `push` and `email` +- stores malformed-intent records for invalid payloads, idempotency conflicts, + and unresolved users + +### Push publisher + +- scans `notification:route_schedule` +- processes only scheduled route IDs beginning with `push:` +- coordinates replicas with temporary route leases +- publishes Gateway client events with `XADD MAXLEN ~` +- omits `device_session_id` so Gateway fans out to all active streams for the + target user + +### Email publisher + +- scans `notification:route_schedule` +- processes only scheduled route IDs beginning with `email:` +- coordinates replicas with temporary route leases +- publishes Mail Service generic commands with plain `XADD` +- always uses `payload_mode=template` + +## Configuration Groups + +Required: + +- `NOTIFICATION_REDIS_ADDR` +- `NOTIFICATION_USER_SERVICE_BASE_URL` + +Core process config: + +- `NOTIFICATION_SHUTDOWN_TIMEOUT` +- `NOTIFICATION_LOG_LEVEL` + +Internal HTTP config: + +- `NOTIFICATION_INTERNAL_HTTP_ADDR` with default `:8092` +- `NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT` with default `2s` +- `NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT` with default `10s` +- `NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT` with default `1m` + +Redis connectivity: + +- `NOTIFICATION_REDIS_USERNAME` +- `NOTIFICATION_REDIS_PASSWORD` +- `NOTIFICATION_REDIS_DB` +- `NOTIFICATION_REDIS_TLS_ENABLED` +- `NOTIFICATION_REDIS_OPERATION_TIMEOUT` +- `NOTIFICATION_INTENTS_STREAM` +- `NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT` +- `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM` +- `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN` +- `NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM` + +Retry and retention: + +- `NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS` +- `NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS` +- `NOTIFICATION_ROUTE_BACKOFF_MIN` +- `NOTIFICATION_ROUTE_BACKOFF_MAX` +- `NOTIFICATION_ROUTE_LEASE_TTL` +- `NOTIFICATION_DEAD_LETTER_TTL` +- `NOTIFICATION_RECORD_TTL` +- `NOTIFICATION_IDEMPOTENCY_TTL` + +User enrichment: + +- `NOTIFICATION_USER_SERVICE_TIMEOUT` with default `1s` + +Administrator routing: + +- `NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED` +- `NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED` +- `NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START` +- `NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED` + +Telemetry: + +- `OTEL_SERVICE_NAME` +- `OTEL_TRACES_EXPORTER` +- `OTEL_METRICS_EXPORTER` +- `OTEL_EXPORTER_OTLP_PROTOCOL` +- `OTEL_EXPORTER_OTLP_TRACES_PROTOCOL` +- `OTEL_EXPORTER_OTLP_METRICS_PROTOCOL` +- `NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED` +- `NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED` + +## Runtime Notes + +- `Notification Service` does not create or own notification audiences; it + trusts producers to publish concrete user recipients. +- Administrator recipients are type-specific configuration, not a global list. +- A missing user is treated as a producer input defect. +- A temporary `User Service` outage pauses stream progress for the affected + entry and allows replay after restart. +- Go producers use `galaxy/notificationintent` to build compatible intents. +- Producers append intents with plain `XADD`; producer-side publish failure is + notification degradation and must not roll back already committed source + business state. +- Dead-letter replay is performed by publishing a new compatible intent with a + new `idempotency_key`. diff --git a/notification/documentation_contract_test.go b/notification/documentation_contract_test.go new file mode 100644 index 0000000..4e7f5a3 --- /dev/null +++ b/notification/documentation_contract_test.go @@ -0,0 +1,57 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNotificationDocumentationStaysPlanIndependent(t *testing.T) { + t.Parallel() + + currentDocs := map[string]string{ + "README.md": loadTextFile(t, "README.md"), + "docs/README.md": loadTextFile(t, filepath.Join("docs", "README.md")), + "docs/runtime.md": loadTextFile(t, filepath.Join("docs", "runtime.md")), + "docs/flows.md": loadTextFile(t, filepath.Join("docs", "flows.md")), + "docs/runbook.md": loadTextFile(t, filepath.Join("docs", "runbook.md")), + "docs/examples.md": loadTextFile(t, filepath.Join("docs", "examples.md")), + "openapi.yaml": loadTextFile(t, "openapi.yaml"), + } + + forbiddenPlan := "PLAN" + ".md" + historicalSlug := "sta" + "ge" + "-" + forbiddenHistoricalDocLink := "docs/" + historicalSlug + forbiddenHistoricalSlug := historicalSlug + forbiddenHistoricalWord := "Sta" + "ge " + + for path, content := range currentDocs { + require.NotContains(t, content, forbiddenPlan, path) + require.NotContains(t, content, forbiddenHistoricalDocLink, path) + require.NotContains(t, content, forbiddenHistoricalSlug, path) + require.NotContains(t, content, forbiddenHistoricalWord, path) + } +} + +func TestNotificationCrossServiceDocumentationStaysInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + testingDoc := loadTextFile(t, filepath.Join("..", "TESTING.md")) + architecture := loadTextFile(t, filepath.Join("..", "ARCHITECTURE.md")) + mailReadme := loadTextFile(t, filepath.Join("..", "mail", "README.md")) + geoProfileReadme := loadTextFile(t, filepath.Join("..", "geoprofile", "README.md")) + gatewayReadme := loadTextFile(t, filepath.Join("..", "gateway", "README.md")) + + for _, content := range []string{readme, testingDoc, architecture, mailReadme, geoProfileReadme, gatewayReadme} { + normalizedContent := normalizeWhitespace(content) + require.Contains(t, normalizedContent, normalizeWhitespace("auth-code")) + require.Contains(t, normalizedContent, normalizeWhitespace("Notification Service")) + } + + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("Real producer-boundary suites for `Game Master`, `Game Lobby`, and `Geo Profile Service` should be added only when those service boundaries exist in code.")) + require.Contains(t, normalizeWhitespace(testingDoc), normalizeWhitespace("`notificationgateway`")) + require.Contains(t, normalizeWhitespace(testingDoc), normalizeWhitespace("`notificationmail`")) + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("real black-box `Notification Service -> Gateway` push fan-out coverage")) +} diff --git a/notification/go.mod b/notification/go.mod new file mode 100644 index 0000000..b5201be --- /dev/null +++ b/notification/go.mod @@ -0,0 +1,90 @@ +module galaxy/notification + +go 1.26.1 + +require ( + galaxy/notificationintent v0.0.0 + galaxy/transcoder v0.0.0 + github.com/alicebob/miniredis/v2 v2.37.0 + github.com/redis/go-redis/extra/redisotel/v9 v9.18.0 + github.com/redis/go-redis/v9 v9.18.0 + github.com/stretchr/testify v1.11.1 + github.com/testcontainers/testcontainers-go v0.42.0 + github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0 + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + dario.cat/mergo v1.0.2 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/containerd/platforms v0.2.1 // indirect + github.com/cpuguy83/dockercfg v0.3.2 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/distribution/reference v0.6.0 // indirect + github.com/docker/go-connections v0.6.0 // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/ebitengine/purego v0.10.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/klauspost/compress v1.18.5 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/magiconair/properties v1.8.10 // indirect + github.com/mdelapenya/tlscert v0.2.0 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/go-archive v0.2.0 // indirect + github.com/moby/moby/api v1.54.1 // indirect + github.com/moby/moby/client v0.4.0 // indirect + github.com/moby/patternmatcher v0.6.1 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect + github.com/moby/sys/user v0.4.0 // indirect + github.com/moby/sys/userns v0.1.0 // indirect + github.com/moby/term v0.5.2 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect + github.com/redis/go-redis/extra/rediscmd/v9 v9.18.0 // indirect + github.com/shirou/gopsutil/v4 v4.26.3 // indirect + github.com/sirupsen/logrus v1.9.4 // indirect + github.com/tklauser/go-sysconf v0.3.16 // indirect + github.com/tklauser/numcpus v0.11.0 // indirect + github.com/yuin/gopher-lua v1.1.1 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + go.uber.org/atomic v1.11.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.36.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect +) diff --git a/notification/go.sum b/notification/go.sum new file mode 100644 index 0000000..2e2fbdf --- /dev/null +++ b/notification/go.sum @@ -0,0 +1,195 @@ +dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= +dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/alicebob/miniredis/v2 v2.37.0 h1:RheObYW32G1aiJIj81XVt78ZHJpHonHLHW7OLIshq68= +github.com/alicebob/miniredis/v2 v2.37.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= +github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= +github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= +github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= +github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= +github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE= +github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= +github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI= +github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8= +github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU= +github.com/moby/moby/api v1.54.1 h1:TqVzuJkOLsgLDDwNLmYqACUuTehOHRGKiPhvH8V3Nn4= +github.com/moby/moby/api v1.54.1/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs= +github.com/moby/moby/client v0.4.0 h1:S+2XegzHQrrvTCvF6s5HFzcrywWQmuVnhOXe2kiWjIw= +github.com/moby/moby/client v0.4.0/go.mod h1:QWPbvWchQbxBNdaLSpoKpCdf5E+WxFAgNHogCWDoa7g= +github.com/moby/patternmatcher v0.6.1 h1:qlhtafmr6kgMIJjKJMDmMWq7WLkKIo23hsrpR3x084U= +github.com/moby/patternmatcher v0.6.1/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= +github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= +github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= +github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= +github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= +github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= +github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= +github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/redis/go-redis/extra/rediscmd/v9 v9.18.0 h1:QY4nmPHLFAJjtT5O4OMUEOxP8WVaRNOFpcbmxT2NLZU= +github.com/redis/go-redis/extra/rediscmd/v9 v9.18.0/go.mod h1:WH8cY/0fT41Bsf341qzo8v4nx0GCE8FykAA23IVbVmo= +github.com/redis/go-redis/extra/redisotel/v9 v9.18.0 h1:2dKdoEYBJ0CZCLPiCdvvc7luz3DPwY6hKdzjL6m1eHE= +github.com/redis/go-redis/extra/redisotel/v9 v9.18.0/go.mod h1:WzkrVG9ro9BwCQD0eJOWn6AGL4Z1CleGflM45w1hu10= +github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs= +github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc= +github.com/shirou/gopsutil/v4 v4.26.3/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ= +github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= +github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= +github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= +github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44XtK+qhjat1nI9cneBbUY= +github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 h1:id/6LH8ZeDrtAUVSuNvZUAJ1kVpb82y1pr9yweAWsRg= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0/go.mod h1:uF0jI8FITagQpBNOgweGBmPf6rP4K0SeL1XFPbsZSSY= +github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= +github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= +github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= +github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ= +github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= +github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 h1:w1K+pCJoPpQifuVpsKamUdn9U0zM3xUziVOqsGksUrY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0/go.mod h1:HBy4BjzgVE8139ieRI75oXm3EcDN+6GhD88JT1Kjvxg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0 h1:TC+BewnDpeiAmcscXbGMfxkO+mwYUwE/VySwvw88PfA= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0/go.mod h1:J/ZyF4vfPwsSr9xJSPyQ4LqtcTPULFR64KwTikGLe+A= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 h1:mS47AX77OtFfKG4vtp+84kuGSFZHTyxtXIN269vChY0= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0/go.mod h1:PJnsC41lAGncJlPUniSwM81gc80GkgWJWr3cu2nKEtU= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= +gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= +pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= +pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= diff --git a/notification/intent_acceptance_contract_test.go b/notification/intent_acceptance_contract_test.go new file mode 100644 index 0000000..2a00801 --- /dev/null +++ b/notification/intent_acceptance_contract_test.go @@ -0,0 +1,41 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +var expectedNotificationIntentAcceptanceDocumentationSnippets = []string{ + "`NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT` with default `2s`", + "`NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED`", + "when no stored stream offset exists, the consumer starts from `0-0`", + "the persisted offset advances only after durable acceptance or durable malformed-intent recording", + "`failure_code=idempotency_conflict`", + "Accepted intents use the original Redis Stream `stream_entry_id` as `notification_id`.", +} + +func TestNotificationIntentAcceptanceDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + runtimeDoc := loadTextFile(t, filepath.Join("docs", "runtime.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedRuntimeDoc := normalizeWhitespace(runtimeDoc) + + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + + for _, snippet := range expectedNotificationIntentAcceptanceDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedRuntimeDoc, normalizeWhitespace("starts from stored offset or `0-0`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("Duplicate handling is scoped by `(producer, idempotency_key)`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("same normalized content")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("idempotency conflict")) +} diff --git a/notification/internal/adapters/doc.go b/notification/internal/adapters/doc.go new file mode 100644 index 0000000..9dc6d59 --- /dev/null +++ b/notification/internal/adapters/doc.go @@ -0,0 +1,2 @@ +// Package adapters reserves the adapter namespace of Notification Service. +package adapters diff --git a/notification/internal/adapters/redis/client.go b/notification/internal/adapters/redis/client.go new file mode 100644 index 0000000..ba9a9f9 --- /dev/null +++ b/notification/internal/adapters/redis/client.go @@ -0,0 +1,72 @@ +// Package redisadapter provides the Redis client helpers used by Notification +// Service runtime wiring. +package redisadapter + +import ( + "context" + "fmt" + + "galaxy/notification/internal/config" + "galaxy/notification/internal/telemetry" + + "github.com/redis/go-redis/extra/redisotel/v9" + "github.com/redis/go-redis/v9" +) + +// NewClient constructs one Redis client from cfg. +func NewClient(cfg config.RedisConfig) *redis.Client { + return redis.NewClient(&redis.Options{ + Addr: cfg.Addr, + Username: cfg.Username, + Password: cfg.Password, + DB: cfg.DB, + TLSConfig: cfg.TLSConfig(), + DialTimeout: cfg.OperationTimeout, + ReadTimeout: cfg.OperationTimeout, + WriteTimeout: cfg.OperationTimeout, + }) +} + +// InstrumentClient attaches Redis tracing and metrics exporters to client when +// telemetryRuntime is available. +func InstrumentClient(client *redis.Client, telemetryRuntime *telemetry.Runtime) error { + if client == nil { + return fmt.Errorf("instrument redis client: nil client") + } + if telemetryRuntime == nil { + return nil + } + + if err := redisotel.InstrumentTracing( + client, + redisotel.WithTracerProvider(telemetryRuntime.TracerProvider()), + redisotel.WithDBStatement(false), + ); err != nil { + return fmt.Errorf("instrument redis client tracing: %w", err) + } + if err := redisotel.InstrumentMetrics( + client, + redisotel.WithMeterProvider(telemetryRuntime.MeterProvider()), + ); err != nil { + return fmt.Errorf("instrument redis client metrics: %w", err) + } + + return nil +} + +// Ping performs the startup Redis connectivity check bounded by +// cfg.OperationTimeout. +func Ping(ctx context.Context, cfg config.RedisConfig, client *redis.Client) error { + if client == nil { + return fmt.Errorf("ping redis: nil client") + } + + pingCtx, cancel := context.WithTimeout(ctx, cfg.OperationTimeout) + defer cancel() + + if err := client.Ping(pingCtx).Err(); err != nil { + return fmt.Errorf("ping redis: %w", err) + } + + return nil +} diff --git a/notification/internal/adapters/redisstate/acceptance_store.go b/notification/internal/adapters/redisstate/acceptance_store.go new file mode 100644 index 0000000..4a75625 --- /dev/null +++ b/notification/internal/adapters/redisstate/acceptance_store.go @@ -0,0 +1,140 @@ +package redisstate + +import ( + "context" + "errors" + "fmt" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + + "github.com/redis/go-redis/v9" +) + +// AcceptanceStore provides the Redis-backed durable storage used by the +// intent-acceptance use case. +type AcceptanceStore struct { + client *redis.Client + writer *AtomicWriter + keys Keyspace + cfg AcceptanceConfig +} + +// NewAcceptanceStore constructs one Redis-backed acceptance store. +func NewAcceptanceStore(client *redis.Client, cfg AcceptanceConfig) (*AcceptanceStore, error) { + if client == nil { + return nil, errors.New("new notification acceptance store: nil redis client") + } + + writer, err := NewAtomicWriter(client, cfg) + if err != nil { + return nil, fmt.Errorf("new notification acceptance store: %w", err) + } + + return &AcceptanceStore{ + client: client, + writer: writer, + keys: Keyspace{}, + cfg: cfg, + }, nil +} + +// CreateAcceptance stores one complete accepted notification write set in +// Redis. +func (store *AcceptanceStore) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error { + if store == nil || store.client == nil || store.writer == nil { + return errors.New("create notification acceptance: nil store") + } + if ctx == nil { + return errors.New("create notification acceptance: nil context") + } + if err := input.Validate(); err != nil { + return fmt.Errorf("create notification acceptance: %w", err) + } + + err := store.writer.CreateAcceptance(ctx, input) + if errors.Is(err, ErrConflict) { + return fmt.Errorf("create notification acceptance: %w", acceptintent.ErrConflict) + } + if err != nil { + return fmt.Errorf("create notification acceptance: %w", err) + } + + return nil +} + +// GetIdempotency loads one accepted idempotency scope from Redis. +func (store *AcceptanceStore) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) { + if store == nil || store.client == nil { + return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store") + } + if ctx == nil { + return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context") + } + + payload, err := store.client.Get(ctx, store.keys.Idempotency(producer, idempotencyKey)).Bytes() + switch { + case errors.Is(err, redis.Nil): + return acceptintent.IdempotencyRecord{}, false, nil + case err != nil: + return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err) + } + + record, err := UnmarshalIdempotency(payload) + if err != nil { + return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err) + } + + return record, true, nil +} + +// GetNotification loads one accepted notification record from Redis. +func (store *AcceptanceStore) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) { + if store == nil || store.client == nil { + return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store") + } + if ctx == nil { + return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context") + } + + payload, err := store.client.Get(ctx, store.keys.Notification(notificationID)).Bytes() + switch { + case errors.Is(err, redis.Nil): + return acceptintent.NotificationRecord{}, false, nil + case err != nil: + return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err) + } + + record, err := UnmarshalNotification(payload) + if err != nil { + return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err) + } + + return record, true, nil +} + +// GetRoute loads one accepted notification route by NotificationID and +// RouteID. +func (store *AcceptanceStore) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) { + if store == nil || store.client == nil { + return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store") + } + if ctx == nil { + return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context") + } + + payload, err := store.client.Get(ctx, store.keys.Route(notificationID, routeID)).Bytes() + switch { + case errors.Is(err, redis.Nil): + return acceptintent.NotificationRoute{}, false, nil + case err != nil: + return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err) + } + + record, err := UnmarshalRoute(payload) + if err != nil { + return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err) + } + + return record, true, nil +} diff --git a/notification/internal/adapters/redisstate/acceptance_store_test.go b/notification/internal/adapters/redisstate/acceptance_store_test.go new file mode 100644 index 0000000..d4e8793 --- /dev/null +++ b/notification/internal/adapters/redisstate/acceptance_store_test.go @@ -0,0 +1,311 @@ +package redisstate + +import ( + "context" + "io" + "log/slog" + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/config" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/malformedintent" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" +) + +func TestAcceptanceStoreCreateAcceptancePersistsNotificationRoutesAndSchedule(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validAdminAcceptanceInput(now) + + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + notificationRecord, found, err := store.GetNotification(context.Background(), input.Notification.NotificationID) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, input.Notification.NotificationID, notificationRecord.NotificationID) + + idempotencyRecord, found, err := store.GetIdempotency(context.Background(), input.Idempotency.Producer, input.Idempotency.IdempotencyKey) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, input.Idempotency.RequestFingerprint, idempotencyRecord.RequestFingerprint) + + pushRoutePayload, err := client.Get(context.Background(), Keyspace{}.Route(input.Notification.NotificationID, "push:email:owner@example.com")).Bytes() + require.NoError(t, err) + pushRoute, err := UnmarshalRoute(pushRoutePayload) + require.NoError(t, err) + require.Equal(t, acceptintent.RouteStatusSkipped, pushRoute.Status) + + emailRouteKey := Keyspace{}.Route(input.Notification.NotificationID, "email:email:owner@example.com") + emailRoutePayload, err := client.Get(context.Background(), emailRouteKey).Bytes() + require.NoError(t, err) + emailRoute, err := UnmarshalRoute(emailRoutePayload) + require.NoError(t, err) + require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status) + + scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result() + require.NoError(t, err) + require.Len(t, scheduled, 1) + require.Equal(t, emailRouteKey, scheduled[0].Member) + require.Equal(t, float64(now.UnixMilli()), scheduled[0].Score) + + notificationTTL, err := client.PTTL(context.Background(), Keyspace{}.Notification(input.Notification.NotificationID)).Result() + require.NoError(t, err) + require.Greater(t, notificationTTL, 23*time.Hour) + require.LessOrEqual(t, notificationTTL, 24*time.Hour) + + routeTTL, err := client.PTTL(context.Background(), emailRouteKey).Result() + require.NoError(t, err) + require.Greater(t, routeTTL, 23*time.Hour) + require.LessOrEqual(t, routeTTL, 24*time.Hour) + + idempotencyTTL, err := client.PTTL(context.Background(), Keyspace{}.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)).Result() + require.NoError(t, err) + require.Greater(t, idempotencyTTL, 6*24*time.Hour) + require.LessOrEqual(t, idempotencyTTL, 7*24*time.Hour) +} + +func TestMalformedIntentStoreRecordPersistsEntry(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewMalformedIntentStore(client, 72*time.Hour) + require.NoError(t, err) + + entry := malformedintent.Entry{ + StreamEntryID: "1775121700000-0", + NotificationType: "game.turn.ready", + Producer: "game_master", + IdempotencyKey: "game-123:turn-54", + FailureCode: malformedintent.FailureCodeInvalidPayload, + FailureMessage: "payload_json.turn_number is required", + RawFields: map[string]any{ + "notification_type": "game.turn.ready", + }, + RecordedAt: time.UnixMilli(1775121700000).UTC(), + } + + require.NoError(t, store.Record(context.Background(), entry)) + + payload, err := client.Get(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Bytes() + require.NoError(t, err) + recordedEntry, err := UnmarshalMalformedIntent(payload) + require.NoError(t, err) + require.Equal(t, entry.StreamEntryID, recordedEntry.StreamEntryID) + require.Equal(t, entry.FailureCode, recordedEntry.FailureCode) + + ttl, err := client.PTTL(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Result() + require.NoError(t, err) + require.Greater(t, ttl, 71*time.Hour) + require.LessOrEqual(t, ttl, 72*time.Hour) +} + +func TestStreamOffsetStoreLoadAndSave(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewStreamOffsetStore(client) + require.NoError(t, err) + + _, found, err := store.Load(context.Background(), "notification:intents") + require.NoError(t, err) + require.False(t, found) + + require.NoError(t, store.Save(context.Background(), "notification:intents", "1775121700000-0")) + + entryID, found, err := store.Load(context.Background(), "notification:intents") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, "1775121700000-0", entryID) +} + +func TestIntentStreamLagReaderReadsOldestUnprocessedEntry(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewStreamOffsetStore(client) + require.NoError(t, err) + reader, err := NewIntentStreamLagReader(store, "notification:intents") + require.NoError(t, err) + + firstID, err := client.XAdd(context.Background(), &redis.XAddArgs{ + Stream: "notification:intents", + ID: "1775121700000-0", + Values: map[string]any{"payload": "first"}, + }).Result() + require.NoError(t, err) + secondID, err := client.XAdd(context.Background(), &redis.XAddArgs{ + Stream: "notification:intents", + ID: "1775121701000-0", + Values: map[string]any{"payload": "second"}, + }).Result() + require.NoError(t, err) + + snapshot, err := reader.ReadIntentStreamLagSnapshot(context.Background()) + require.NoError(t, err) + require.NotNil(t, snapshot.OldestUnprocessedAt) + require.Equal(t, time.UnixMilli(1775121700000).UTC(), *snapshot.OldestUnprocessedAt) + + require.NoError(t, store.Save(context.Background(), "notification:intents", firstID)) + snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background()) + require.NoError(t, err) + require.NotNil(t, snapshot.OldestUnprocessedAt) + require.Equal(t, time.UnixMilli(1775121701000).UTC(), *snapshot.OldestUnprocessedAt) + + require.NoError(t, store.Save(context.Background(), "notification:intents", secondID)) + snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background()) + require.NoError(t, err) + require.Nil(t, snapshot.OldestUnprocessedAt) +} + +func TestAcceptanceStoreWorksWithAcceptIntentService(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + service, err := acceptintent.New(acceptintent.Config{ + Store: store, + UserDirectory: staticUserDirectory{}, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + AdminRouting: config.AdminRoutingConfig{ + LobbyApplicationSubmitted: []string{"owner@example.com"}, + }, + }) + require.NoError(t, err) + + result, err := service.Execute(context.Background(), acceptintent.AcceptInput{ + NotificationID: "1775121700000-0", + Intent: intentstream.Intent{ + NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindAdminEmail, + IdempotencyKey: "game-456:application-submitted:user-42", + OccurredAt: time.UnixMilli(1775121700002).UTC(), + PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`, + }, + }) + require.NoError(t, err) + require.Equal(t, acceptintent.OutcomeAccepted, result.Outcome) + + record, found, err := store.GetNotification(context.Background(), "1775121700000-0") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, "1775121700000-0", record.NotificationID) +} + +type fixedClock struct { + now time.Time +} + +func (clock fixedClock) Now() time.Time { + return clock.now +} + +func validAdminAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput { + return acceptintent.CreateAcceptanceInput{ + Notification: acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindAdminEmail, + PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`, + IdempotencyKey: "game-456:application-submitted:user-42", + RequestFingerprint: "sha256:deadbeef", + OccurredAt: now, + AcceptedAt: now, + UpdatedAt: now, + }, + Routes: []acceptintent.NotificationRoute{ + { + NotificationID: "1775121700000-0", + RouteID: "push:email:owner@example.com", + Channel: intentstream.ChannelPush, + RecipientRef: "email:owner@example.com", + Status: acceptintent.RouteStatusSkipped, + AttemptCount: 0, + MaxAttempts: 3, + ResolvedEmail: "owner@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + SkippedAt: now, + }, + { + NotificationID: "1775121700000-0", + RouteID: "email:email:owner@example.com", + Channel: intentstream.ChannelEmail, + RecipientRef: "email:owner@example.com", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "owner@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + }, + Idempotency: acceptintent.IdempotencyRecord{ + Producer: intentstream.ProducerGameLobby, + IdempotencyKey: "game-456:application-submitted:user-42", + NotificationID: "1775121700000-0", + RequestFingerprint: "sha256:deadbeef", + CreatedAt: now, + ExpiresAt: now.Add(7 * 24 * time.Hour), + }, + } +} + +func newTestRedisClient(t *testing.T, server *miniredis.Miniredis) *redis.Client { + t.Helper() + + client := redis.NewClient(&redis.Options{ + Addr: server.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, client.Close()) + }) + + return client +} + +type staticUserDirectory struct{} + +func (staticUserDirectory) GetUserByID(context.Context, string) (acceptintent.UserRecord, error) { + return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound +} diff --git a/notification/internal/adapters/redisstate/atomic_writer.go b/notification/internal/adapters/redisstate/atomic_writer.go new file mode 100644 index 0000000..6e6f660 --- /dev/null +++ b/notification/internal/adapters/redisstate/atomic_writer.go @@ -0,0 +1,157 @@ +package redisstate + +import ( + "context" + "errors" + "fmt" + "time" + + "galaxy/notification/internal/service/acceptintent" + + "github.com/redis/go-redis/v9" +) + +// AcceptanceConfig stores the retention settings applied to accepted durable +// notification state. +type AcceptanceConfig struct { + // RecordTTL stores the retention period applied to notification and route + // records. + RecordTTL time.Duration + + // DeadLetterTTL stores the retention period applied to route dead-letter + // entries. + DeadLetterTTL time.Duration + + // IdempotencyTTL stores the retention period applied to idempotency + // reservations. + IdempotencyTTL time.Duration +} + +// Validate reports whether cfg contains usable retention settings. +func (cfg AcceptanceConfig) Validate() error { + switch { + case cfg.RecordTTL <= 0: + return fmt.Errorf("record ttl must be positive") + case cfg.DeadLetterTTL <= 0: + return fmt.Errorf("dead-letter ttl must be positive") + case cfg.IdempotencyTTL <= 0: + return fmt.Errorf("idempotency ttl must be positive") + default: + return nil + } +} + +// AtomicWriter performs the minimal multi-key Redis mutations required by +// notification intent acceptance. +type AtomicWriter struct { + client *redis.Client + keys Keyspace + cfg AcceptanceConfig +} + +// NewAtomicWriter constructs a low-level Redis mutation helper. +func NewAtomicWriter(client *redis.Client, cfg AcceptanceConfig) (*AtomicWriter, error) { + if client == nil { + return nil, errors.New("new notification redis atomic writer: nil client") + } + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("new notification redis atomic writer: %w", err) + } + + return &AtomicWriter{ + client: client, + keys: Keyspace{}, + cfg: cfg, + }, nil +} + +// CreateAcceptance stores one notification record, all derived routes, and +// the matching idempotency reservation in one optimistic Redis transaction. +func (writer *AtomicWriter) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error { + if writer == nil || writer.client == nil { + return errors.New("create notification acceptance in redis: nil writer") + } + if ctx == nil { + return errors.New("create notification acceptance in redis: nil context") + } + if err := input.Validate(); err != nil { + return fmt.Errorf("create notification acceptance in redis: %w", err) + } + + notificationPayload, err := MarshalNotification(input.Notification) + if err != nil { + return fmt.Errorf("create notification acceptance in redis: %w", err) + } + idempotencyPayload, err := MarshalIdempotency(input.Idempotency) + if err != nil { + return fmt.Errorf("create notification acceptance in redis: %w", err) + } + + routePayloads := make([][]byte, len(input.Routes)) + routeKeys := make([]string, len(input.Routes)) + scheduledRouteKeys := make([]string, 0, len(input.Routes)) + scheduledRouteScores := make([]float64, 0, len(input.Routes)) + for index, route := range input.Routes { + payload, err := MarshalRoute(route) + if err != nil { + return fmt.Errorf("create notification acceptance in redis: route %d: %w", index, err) + } + routePayloads[index] = payload + routeKeys[index] = writer.keys.Route(route.NotificationID, route.RouteID) + if route.Status == acceptintent.RouteStatusPending { + scheduledRouteKeys = append(scheduledRouteKeys, routeKeys[index]) + scheduledRouteScores = append(scheduledRouteScores, float64(route.NextAttemptAt.UTC().UnixMilli())) + } + } + + notificationKey := writer.keys.Notification(input.Notification.NotificationID) + idempotencyKey := writer.keys.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey) + watchKeys := append([]string{notificationKey, idempotencyKey}, routeKeys...) + + watchErr := writer.client.Watch(ctx, func(tx *redis.Tx) error { + for _, key := range watchKeys { + if err := ensureKeyAbsent(ctx, tx, key); err != nil { + return err + } + } + + _, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.Set(ctx, notificationKey, notificationPayload, writer.cfg.RecordTTL) + pipe.Set(ctx, idempotencyKey, idempotencyPayload, writer.cfg.IdempotencyTTL) + for index, routeKey := range routeKeys { + pipe.Set(ctx, routeKey, routePayloads[index], writer.cfg.RecordTTL) + } + for index, routeKey := range scheduledRouteKeys { + pipe.ZAdd(ctx, writer.keys.RouteSchedule(), redis.Z{ + Score: scheduledRouteScores[index], + Member: routeKey, + }) + } + + return nil + }) + + return err + }, watchKeys...) + + switch { + case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr): + return ErrConflict + case watchErr != nil: + return fmt.Errorf("create notification acceptance in redis: %w", watchErr) + default: + return nil + } +} + +func ensureKeyAbsent(ctx context.Context, tx *redis.Tx, key string) error { + exists, err := tx.Exists(ctx, key).Result() + if err != nil { + return err + } + if exists > 0 { + return ErrConflict + } + + return nil +} diff --git a/notification/internal/adapters/redisstate/codecs.go b/notification/internal/adapters/redisstate/codecs.go new file mode 100644 index 0000000..65979a6 --- /dev/null +++ b/notification/internal/adapters/redisstate/codecs.go @@ -0,0 +1,547 @@ +package redisstate + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/malformedintent" +) + +// StreamOffset stores the persisted progress of the plain-XREAD intent +// consumer. +type StreamOffset struct { + // Stream stores the Redis Stream name. + Stream string + + // LastProcessedEntryID stores the last durably processed Redis Stream entry + // identifier. + LastProcessedEntryID string + + // UpdatedAt stores when the offset record was last updated. + UpdatedAt time.Time +} + +// DeadLetterEntry stores one terminal route-publication failure recorded for +// later operator inspection. +type DeadLetterEntry struct { + // NotificationID stores the owning notification identifier. + NotificationID string + + // RouteID stores the exhausted route identifier. + RouteID string + + // Channel stores the failed route channel. + Channel intentstream.Channel + + // RecipientRef stores the stable failed recipient slot identifier. + RecipientRef string + + // FinalAttemptCount stores how many publication attempts were consumed. + FinalAttemptCount int + + // MaxAttempts stores the configured retry budget for Channel. + MaxAttempts int + + // FailureClassification stores the stable classified failure reason. + FailureClassification string + + // FailureMessage stores the last failure detail. + FailureMessage string + + // CreatedAt stores when the route moved to dead_letter. + CreatedAt time.Time + + // RecoveryHint stores the optional operator-facing recovery hint. + RecoveryHint string +} + +type notificationRecordJSON struct { + NotificationID string `json:"notification_id"` + NotificationType intentstream.NotificationType `json:"notification_type"` + Producer intentstream.Producer `json:"producer"` + AudienceKind intentstream.AudienceKind `json:"audience_kind"` + RecipientUserIDs []string `json:"recipient_user_ids,omitempty"` + PayloadJSON string `json:"payload_json"` + IdempotencyKey string `json:"idempotency_key"` + RequestFingerprint string `json:"request_fingerprint"` + RequestID string `json:"request_id,omitempty"` + TraceID string `json:"trace_id,omitempty"` + OccurredAtMS int64 `json:"occurred_at_ms"` + AcceptedAtMS int64 `json:"accepted_at_ms"` + UpdatedAtMS int64 `json:"updated_at_ms"` +} + +type notificationRouteJSON struct { + NotificationID string `json:"notification_id"` + RouteID string `json:"route_id"` + Channel intentstream.Channel `json:"channel"` + RecipientRef string `json:"recipient_ref"` + Status acceptintent.RouteStatus `json:"status"` + AttemptCount int `json:"attempt_count"` + MaxAttempts int `json:"max_attempts"` + NextAttemptAtMS *int64 `json:"next_attempt_at_ms,omitempty"` + ResolvedEmail string `json:"resolved_email,omitempty"` + ResolvedLocale string `json:"resolved_locale,omitempty"` + LastErrorClassification string `json:"last_error_classification,omitempty"` + LastErrorMessage string `json:"last_error_message,omitempty"` + LastErrorAtMS *int64 `json:"last_error_at_ms,omitempty"` + CreatedAtMS int64 `json:"created_at_ms"` + UpdatedAtMS int64 `json:"updated_at_ms"` + PublishedAtMS *int64 `json:"published_at_ms,omitempty"` + DeadLetteredAtMS *int64 `json:"dead_lettered_at_ms,omitempty"` + SkippedAtMS *int64 `json:"skipped_at_ms,omitempty"` +} + +type idempotencyRecordJSON struct { + Producer intentstream.Producer `json:"producer"` + IdempotencyKey string `json:"idempotency_key"` + NotificationID string `json:"notification_id"` + RequestFingerprint string `json:"request_fingerprint"` + CreatedAtMS int64 `json:"created_at_ms"` + ExpiresAtMS int64 `json:"expires_at_ms"` +} + +type malformedIntentJSON struct { + StreamEntryID string `json:"stream_entry_id"` + NotificationType string `json:"notification_type,omitempty"` + Producer string `json:"producer,omitempty"` + IdempotencyKey string `json:"idempotency_key,omitempty"` + FailureCode malformedintent.FailureCode `json:"failure_code"` + FailureMessage string `json:"failure_message"` + RawFields map[string]any `json:"raw_fields_json"` + RecordedAtMS int64 `json:"recorded_at_ms"` +} + +type streamOffsetJSON struct { + Stream string `json:"stream"` + LastProcessedEntryID string `json:"last_processed_entry_id"` + UpdatedAtMS int64 `json:"updated_at_ms"` +} + +type deadLetterEntryJSON struct { + NotificationID string `json:"notification_id"` + RouteID string `json:"route_id"` + Channel intentstream.Channel `json:"channel"` + RecipientRef string `json:"recipient_ref"` + FinalAttemptCount int `json:"final_attempt_count"` + MaxAttempts int `json:"max_attempts"` + FailureClassification string `json:"failure_classification"` + FailureMessage string `json:"failure_message"` + CreatedAtMS int64 `json:"created_at_ms"` + RecoveryHint string `json:"recovery_hint,omitempty"` +} + +// MarshalNotification marshals one notification record into the strict JSON +// representation owned by Notification Service. +func MarshalNotification(record acceptintent.NotificationRecord) ([]byte, error) { + if err := record.Validate(); err != nil { + return nil, fmt.Errorf("marshal notification record: %w", err) + } + + return marshalStrictJSON(notificationRecordJSON{ + NotificationID: record.NotificationID, + NotificationType: record.NotificationType, + Producer: record.Producer, + AudienceKind: record.AudienceKind, + RecipientUserIDs: append([]string(nil), record.RecipientUserIDs...), + PayloadJSON: record.PayloadJSON, + IdempotencyKey: record.IdempotencyKey, + RequestFingerprint: record.RequestFingerprint, + RequestID: record.RequestID, + TraceID: record.TraceID, + OccurredAtMS: unixMilli(record.OccurredAt), + AcceptedAtMS: unixMilli(record.AcceptedAt), + UpdatedAtMS: unixMilli(record.UpdatedAt), + }) +} + +// UnmarshalNotification unmarshals one strict JSON notification record. +func UnmarshalNotification(payload []byte) (acceptintent.NotificationRecord, error) { + var wire notificationRecordJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err) + } + + record := acceptintent.NotificationRecord{ + NotificationID: wire.NotificationID, + NotificationType: wire.NotificationType, + Producer: wire.Producer, + AudienceKind: wire.AudienceKind, + RecipientUserIDs: append([]string(nil), wire.RecipientUserIDs...), + PayloadJSON: wire.PayloadJSON, + IdempotencyKey: wire.IdempotencyKey, + RequestFingerprint: wire.RequestFingerprint, + RequestID: wire.RequestID, + TraceID: wire.TraceID, + OccurredAt: time.UnixMilli(wire.OccurredAtMS).UTC(), + AcceptedAt: time.UnixMilli(wire.AcceptedAtMS).UTC(), + UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(), + } + if err := record.Validate(); err != nil { + return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err) + } + + return record, nil +} + +// MarshalRoute marshals one notification route into the strict JSON +// representation owned by Notification Service. +func MarshalRoute(route acceptintent.NotificationRoute) ([]byte, error) { + if err := route.Validate(); err != nil { + return nil, fmt.Errorf("marshal notification route: %w", err) + } + + return marshalStrictJSON(notificationRouteJSON{ + NotificationID: route.NotificationID, + RouteID: route.RouteID, + Channel: route.Channel, + RecipientRef: route.RecipientRef, + Status: route.Status, + AttemptCount: route.AttemptCount, + MaxAttempts: route.MaxAttempts, + NextAttemptAtMS: optionalUnixMilli(route.NextAttemptAt), + ResolvedEmail: route.ResolvedEmail, + ResolvedLocale: route.ResolvedLocale, + LastErrorClassification: route.LastErrorClassification, + LastErrorMessage: route.LastErrorMessage, + LastErrorAtMS: optionalUnixMilli(route.LastErrorAt), + CreatedAtMS: unixMilli(route.CreatedAt), + UpdatedAtMS: unixMilli(route.UpdatedAt), + PublishedAtMS: optionalUnixMilli(route.PublishedAt), + DeadLetteredAtMS: optionalUnixMilli(route.DeadLetteredAt), + SkippedAtMS: optionalUnixMilli(route.SkippedAt), + }) +} + +// UnmarshalRoute unmarshals one strict JSON notification route. +func UnmarshalRoute(payload []byte) (acceptintent.NotificationRoute, error) { + var wire notificationRouteJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err) + } + + route := acceptintent.NotificationRoute{ + NotificationID: wire.NotificationID, + RouteID: wire.RouteID, + Channel: wire.Channel, + RecipientRef: wire.RecipientRef, + Status: wire.Status, + AttemptCount: wire.AttemptCount, + MaxAttempts: wire.MaxAttempts, + ResolvedEmail: wire.ResolvedEmail, + ResolvedLocale: wire.ResolvedLocale, + LastErrorClassification: wire.LastErrorClassification, + LastErrorMessage: wire.LastErrorMessage, + CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(), + UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(), + } + if wire.NextAttemptAtMS != nil { + route.NextAttemptAt = time.UnixMilli(*wire.NextAttemptAtMS).UTC() + } + if wire.LastErrorAtMS != nil { + route.LastErrorAt = time.UnixMilli(*wire.LastErrorAtMS).UTC() + } + if wire.PublishedAtMS != nil { + route.PublishedAt = time.UnixMilli(*wire.PublishedAtMS).UTC() + } + if wire.DeadLetteredAtMS != nil { + route.DeadLetteredAt = time.UnixMilli(*wire.DeadLetteredAtMS).UTC() + } + if wire.SkippedAtMS != nil { + route.SkippedAt = time.UnixMilli(*wire.SkippedAtMS).UTC() + } + if err := route.Validate(); err != nil { + return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err) + } + + return route, nil +} + +// MarshalIdempotency marshals one idempotency record into the strict JSON +// representation owned by Notification Service. +func MarshalIdempotency(record acceptintent.IdempotencyRecord) ([]byte, error) { + if err := record.Validate(); err != nil { + return nil, fmt.Errorf("marshal notification idempotency record: %w", err) + } + + return marshalStrictJSON(idempotencyRecordJSON{ + Producer: record.Producer, + IdempotencyKey: record.IdempotencyKey, + NotificationID: record.NotificationID, + RequestFingerprint: record.RequestFingerprint, + CreatedAtMS: unixMilli(record.CreatedAt), + ExpiresAtMS: unixMilli(record.ExpiresAt), + }) +} + +// UnmarshalIdempotency unmarshals one strict JSON idempotency record. +func UnmarshalIdempotency(payload []byte) (acceptintent.IdempotencyRecord, error) { + var wire idempotencyRecordJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err) + } + + record := acceptintent.IdempotencyRecord{ + Producer: wire.Producer, + IdempotencyKey: wire.IdempotencyKey, + NotificationID: wire.NotificationID, + RequestFingerprint: wire.RequestFingerprint, + CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(), + ExpiresAt: time.UnixMilli(wire.ExpiresAtMS).UTC(), + } + if err := record.Validate(); err != nil { + return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err) + } + + return record, nil +} + +// MarshalDeadLetter marshals one dead-letter entry into the strict JSON +// representation owned by Notification Service. +func MarshalDeadLetter(entry DeadLetterEntry) ([]byte, error) { + if err := entry.Validate(); err != nil { + return nil, fmt.Errorf("marshal dead letter entry: %w", err) + } + + return marshalStrictJSON(deadLetterEntryJSON{ + NotificationID: entry.NotificationID, + RouteID: entry.RouteID, + Channel: entry.Channel, + RecipientRef: entry.RecipientRef, + FinalAttemptCount: entry.FinalAttemptCount, + MaxAttempts: entry.MaxAttempts, + FailureClassification: entry.FailureClassification, + FailureMessage: entry.FailureMessage, + CreatedAtMS: unixMilli(entry.CreatedAt), + RecoveryHint: entry.RecoveryHint, + }) +} + +// UnmarshalDeadLetter unmarshals one strict JSON dead-letter entry. +func UnmarshalDeadLetter(payload []byte) (DeadLetterEntry, error) { + var wire deadLetterEntryJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err) + } + + entry := DeadLetterEntry{ + NotificationID: wire.NotificationID, + RouteID: wire.RouteID, + Channel: wire.Channel, + RecipientRef: wire.RecipientRef, + FinalAttemptCount: wire.FinalAttemptCount, + MaxAttempts: wire.MaxAttempts, + FailureClassification: wire.FailureClassification, + FailureMessage: wire.FailureMessage, + CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(), + RecoveryHint: wire.RecoveryHint, + } + if err := entry.Validate(); err != nil { + return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err) + } + + return entry, nil +} + +// MarshalMalformedIntent marshals one malformed-intent entry into the strict +// JSON representation owned by Notification Service. +func MarshalMalformedIntent(entry malformedintent.Entry) ([]byte, error) { + if err := entry.Validate(); err != nil { + return nil, fmt.Errorf("marshal malformed intent: %w", err) + } + + return marshalStrictJSON(malformedIntentJSON{ + StreamEntryID: entry.StreamEntryID, + NotificationType: entry.NotificationType, + Producer: entry.Producer, + IdempotencyKey: entry.IdempotencyKey, + FailureCode: entry.FailureCode, + FailureMessage: entry.FailureMessage, + RawFields: cloneJSONObject(entry.RawFields), + RecordedAtMS: unixMilli(entry.RecordedAt), + }) +} + +// UnmarshalMalformedIntent unmarshals one strict JSON malformed-intent entry. +func UnmarshalMalformedIntent(payload []byte) (malformedintent.Entry, error) { + var wire malformedIntentJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err) + } + + entry := malformedintent.Entry{ + StreamEntryID: wire.StreamEntryID, + NotificationType: wire.NotificationType, + Producer: wire.Producer, + IdempotencyKey: wire.IdempotencyKey, + FailureCode: wire.FailureCode, + FailureMessage: wire.FailureMessage, + RawFields: cloneJSONObject(wire.RawFields), + RecordedAt: time.UnixMilli(wire.RecordedAtMS).UTC(), + } + if err := entry.Validate(); err != nil { + return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err) + } + + return entry, nil +} + +// MarshalStreamOffset marshals one stream-offset record into the strict JSON +// representation owned by Notification Service. +func MarshalStreamOffset(offset StreamOffset) ([]byte, error) { + if err := offset.Validate(); err != nil { + return nil, fmt.Errorf("marshal stream offset: %w", err) + } + + return marshalStrictJSON(streamOffsetJSON{ + Stream: offset.Stream, + LastProcessedEntryID: offset.LastProcessedEntryID, + UpdatedAtMS: unixMilli(offset.UpdatedAt), + }) +} + +// UnmarshalStreamOffset unmarshals one strict JSON stream-offset record. +func UnmarshalStreamOffset(payload []byte) (StreamOffset, error) { + var wire streamOffsetJSON + if err := unmarshalStrictJSON(payload, &wire); err != nil { + return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err) + } + + offset := StreamOffset{ + Stream: wire.Stream, + LastProcessedEntryID: wire.LastProcessedEntryID, + UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(), + } + if err := offset.Validate(); err != nil { + return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err) + } + + return offset, nil +} + +// Validate reports whether offset contains a complete persisted consumer +// progress record. +func (offset StreamOffset) Validate() error { + if offset.Stream == "" { + return fmt.Errorf("stream offset stream must not be empty") + } + if offset.LastProcessedEntryID == "" { + return fmt.Errorf("stream offset last processed entry id must not be empty") + } + if offset.UpdatedAt.IsZero() { + return fmt.Errorf("stream offset updated at must not be zero") + } + if !offset.UpdatedAt.Equal(offset.UpdatedAt.UTC()) { + return fmt.Errorf("stream offset updated at must be UTC") + } + if !offset.UpdatedAt.Equal(offset.UpdatedAt.Truncate(time.Millisecond)) { + return fmt.Errorf("stream offset updated at must use millisecond precision") + } + + return nil +} + +// Validate reports whether entry contains a complete dead-letter record. +func (entry DeadLetterEntry) Validate() error { + if entry.NotificationID == "" { + return fmt.Errorf("dead letter entry notification id must not be empty") + } + if entry.RouteID == "" { + return fmt.Errorf("dead letter entry route id must not be empty") + } + if !entry.Channel.IsKnown() { + return fmt.Errorf("dead letter entry channel %q is unsupported", entry.Channel) + } + if entry.RecipientRef == "" { + return fmt.Errorf("dead letter entry recipient ref must not be empty") + } + if entry.FinalAttemptCount <= 0 { + return fmt.Errorf("dead letter entry final attempt count must be positive") + } + if entry.MaxAttempts <= 0 { + return fmt.Errorf("dead letter entry max attempts must be positive") + } + if entry.FailureClassification == "" { + return fmt.Errorf("dead letter entry failure classification must not be empty") + } + if entry.FailureMessage == "" { + return fmt.Errorf("dead letter entry failure message must not be empty") + } + if entry.CreatedAt.IsZero() { + return fmt.Errorf("dead letter entry created at must not be zero") + } + if !entry.CreatedAt.Equal(entry.CreatedAt.UTC()) { + return fmt.Errorf("dead letter entry created at must be UTC") + } + if !entry.CreatedAt.Equal(entry.CreatedAt.Truncate(time.Millisecond)) { + return fmt.Errorf("dead letter entry created at must use millisecond precision") + } + + return nil +} + +func marshalStrictJSON(value any) ([]byte, error) { + return json.Marshal(value) +} + +func unmarshalStrictJSON(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewBuffer(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return fmt.Errorf("unexpected trailing JSON input") + } + return err + } + + return nil +} + +func unixMilli(value time.Time) int64 { + return value.UTC().UnixMilli() +} + +func optionalUnixMilli(value time.Time) *int64 { + if value.IsZero() { + return nil + } + millis := unixMilli(value) + return &millis +} + +func cloneJSONObject(value map[string]any) map[string]any { + if value == nil { + return map[string]any{} + } + + cloned := make(map[string]any, len(value)) + for key, raw := range value { + cloned[key] = cloneJSONValue(raw) + } + + return cloned +} + +func cloneJSONValue(value any) any { + switch typed := value.(type) { + case map[string]any: + return cloneJSONObject(typed) + case []any: + cloned := make([]any, len(typed)) + for index, item := range typed { + cloned[index] = cloneJSONValue(item) + } + return cloned + default: + return typed + } +} diff --git a/notification/internal/adapters/redisstate/doc.go b/notification/internal/adapters/redisstate/doc.go new file mode 100644 index 0000000..001d00f --- /dev/null +++ b/notification/internal/adapters/redisstate/doc.go @@ -0,0 +1,3 @@ +// Package redisstate defines the frozen Redis keyspace, strict JSON records, +// and low-level mutation helpers used by Notification Service durable state. +package redisstate diff --git a/notification/internal/adapters/redisstate/errors.go b/notification/internal/adapters/redisstate/errors.go new file mode 100644 index 0000000..0ebd17f --- /dev/null +++ b/notification/internal/adapters/redisstate/errors.go @@ -0,0 +1,10 @@ +package redisstate + +import "errors" + +var ( + // ErrConflict reports that a Redis mutation could not be applied because + // one of the watched or newly created keys already existed or changed + // concurrently. + ErrConflict = errors.New("redis state conflict") +) diff --git a/notification/internal/adapters/redisstate/keyspace.go b/notification/internal/adapters/redisstate/keyspace.go new file mode 100644 index 0000000..9ec7f81 --- /dev/null +++ b/notification/internal/adapters/redisstate/keyspace.go @@ -0,0 +1,105 @@ +package redisstate + +import ( + "encoding/base64" + "fmt" + "strings" + + "galaxy/notification/internal/api/intentstream" +) + +const defaultPrefix = "notification:" + +// Keyspace builds the frozen Notification Service Redis keys. All dynamic key +// segments are encoded with base64url so raw key structure does not depend on +// caller-provided characters. +type Keyspace struct{} + +// Notification returns the primary Redis key for one notification_record. +func (Keyspace) Notification(notificationID string) string { + return defaultPrefix + "records:" + encodeKeyComponent(notificationID) +} + +// Route returns the primary Redis key for one notification_route. +func (Keyspace) Route(notificationID string, routeID string) string { + return defaultPrefix + "routes:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID) +} + +// ParseRoute returns the notification identifier and route identifier encoded +// inside routeKey. +func (Keyspace) ParseRoute(routeKey string) (string, string, error) { + trimmedPrefix := defaultPrefix + "routes:" + if !strings.HasPrefix(routeKey, trimmedPrefix) { + return "", "", fmt.Errorf("parse route key: %q does not use %q prefix", routeKey, trimmedPrefix) + } + + encoded := strings.TrimPrefix(routeKey, trimmedPrefix) + parts := strings.Split(encoded, ":") + if len(parts) != 2 { + return "", "", fmt.Errorf("parse route key: %q must contain exactly two encoded segments", routeKey) + } + + notificationID, err := decodeKeyComponent(parts[0]) + if err != nil { + return "", "", fmt.Errorf("parse route key: notification id: %w", err) + } + routeID, err := decodeKeyComponent(parts[1]) + if err != nil { + return "", "", fmt.Errorf("parse route key: route id: %w", err) + } + + return notificationID, routeID, nil +} + +// Idempotency returns the primary Redis key for one +// notification_idempotency_record. +func (Keyspace) Idempotency(producer intentstream.Producer, idempotencyKey string) string { + return defaultPrefix + "idempotency:" + encodeKeyComponent(string(producer)) + ":" + encodeKeyComponent(idempotencyKey) +} + +// DeadLetter returns the primary Redis key for one +// notification_dead_letter_entry. +func (Keyspace) DeadLetter(notificationID string, routeID string) string { + return defaultPrefix + "dead_letters:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID) +} + +// RouteLease returns the temporary Redis key used to coordinate exclusive +// publication of one notification_route across replicas. +func (Keyspace) RouteLease(notificationID string, routeID string) string { + return defaultPrefix + "route_leases:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID) +} + +// MalformedIntent returns the primary Redis key for one malformed-intent +// record. +func (Keyspace) MalformedIntent(streamEntryID string) string { + return defaultPrefix + "malformed_intents:" + encodeKeyComponent(streamEntryID) +} + +// StreamOffset returns the primary Redis key for one persisted intent-consumer +// offset. +func (Keyspace) StreamOffset(stream string) string { + return defaultPrefix + "stream_offsets:" + encodeKeyComponent(stream) +} + +// Intents returns the frozen ingress Redis Stream key. +func (Keyspace) Intents() string { + return defaultPrefix + "intents" +} + +// RouteSchedule returns the frozen route schedule sorted-set key. +func (Keyspace) RouteSchedule() string { + return defaultPrefix + "route_schedule" +} + +func encodeKeyComponent(value string) string { + return base64.RawURLEncoding.EncodeToString([]byte(value)) +} + +func decodeKeyComponent(value string) (string, error) { + decoded, err := base64.RawURLEncoding.DecodeString(value) + if err != nil { + return "", err + } + + return string(decoded), nil +} diff --git a/notification/internal/adapters/redisstate/malformed_intent_store.go b/notification/internal/adapters/redisstate/malformed_intent_store.go new file mode 100644 index 0000000..50fc52e --- /dev/null +++ b/notification/internal/adapters/redisstate/malformed_intent_store.go @@ -0,0 +1,59 @@ +package redisstate + +import ( + "context" + "errors" + "fmt" + "time" + + "galaxy/notification/internal/service/malformedintent" + + "github.com/redis/go-redis/v9" +) + +// MalformedIntentStore provides the Redis-backed storage used for +// operator-visible malformed-intent records. +type MalformedIntentStore struct { + client *redis.Client + keys Keyspace + ttl time.Duration +} + +// NewMalformedIntentStore constructs one Redis-backed malformed-intent store. +func NewMalformedIntentStore(client *redis.Client, ttl time.Duration) (*MalformedIntentStore, error) { + if client == nil { + return nil, errors.New("new malformed intent store: nil redis client") + } + if ttl <= 0 { + return nil, errors.New("new malformed intent store: non-positive ttl") + } + + return &MalformedIntentStore{ + client: client, + keys: Keyspace{}, + ttl: ttl, + }, nil +} + +// Record stores entry idempotently by its Redis Stream entry identifier. +func (store *MalformedIntentStore) Record(ctx context.Context, entry malformedintent.Entry) error { + if store == nil || store.client == nil { + return errors.New("record malformed intent: nil store") + } + if ctx == nil { + return errors.New("record malformed intent: nil context") + } + if err := entry.Validate(); err != nil { + return fmt.Errorf("record malformed intent: %w", err) + } + + payload, err := MarshalMalformedIntent(entry) + if err != nil { + return fmt.Errorf("record malformed intent: %w", err) + } + if err := store.client.Set(ctx, store.keys.MalformedIntent(entry.StreamEntryID), payload, store.ttl).Err(); err != nil { + return fmt.Errorf("record malformed intent: %w", err) + } + + return nil +} diff --git a/notification/internal/adapters/redisstate/route_state_store.go b/notification/internal/adapters/redisstate/route_state_store.go new file mode 100644 index 0000000..f70a8d9 --- /dev/null +++ b/notification/internal/adapters/redisstate/route_state_store.go @@ -0,0 +1,657 @@ +package redisstate + +import ( + "bytes" + "context" + "errors" + "fmt" + "sort" + "strconv" + "time" + + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/telemetry" + + "github.com/redis/go-redis/v9" +) + +var releaseRouteLeaseScript = redis.NewScript(` +if redis.call("GET", KEYS[1]) == ARGV[1] then + return redis.call("DEL", KEYS[1]) +end +return 0 +`) + +var completePublishedRouteScript = redis.NewScript(` +if redis.call("GET", KEYS[1]) ~= ARGV[1] then + return 0 +end +if redis.call("GET", KEYS[2]) ~= ARGV[2] then + return 0 +end +local field_count = tonumber(ARGV[6]) +local values = {} +local index = 7 +for _ = 1, field_count do + table.insert(values, ARGV[index]) + table.insert(values, ARGV[index + 1]) + index = index + 2 +end +if tonumber(ARGV[4]) > 0 then + redis.call("XADD", ARGV[3], "MAXLEN", "~", ARGV[4], "*", unpack(values)) +else + redis.call("XADD", ARGV[3], "*", unpack(values)) +end +redis.call("SET", KEYS[1], ARGV[5], "KEEPTTL") +redis.call("ZREM", KEYS[3], KEYS[1]) +redis.call("DEL", KEYS[2]) +return 1 +`) + +// ScheduledRoute stores one due route reference loaded from +// `notification:route_schedule`. +type ScheduledRoute struct { + // RouteKey stores the full Redis route key scheduled for processing. + RouteKey string + + // NotificationID stores the owning notification identifier. + NotificationID string + + // RouteID stores the scheduled route identifier. + RouteID string +} + +// CompleteRoutePublishedInput stores the data required to mark one route as +// published while atomically appending one outbound stream entry. +type CompleteRoutePublishedInput struct { + // ExpectedRoute stores the current route state previously loaded by the + // caller. + ExpectedRoute acceptintent.NotificationRoute + + // LeaseToken stores the route-lease owner token that must still be held. + LeaseToken string + + // PublishedAt stores when the publication attempt succeeded. + PublishedAt time.Time + + // Stream stores the outbound Redis Stream name. + Stream string + + // StreamMaxLen bounds Stream with approximate trimming when positive. Zero + // disables trimming. + StreamMaxLen int64 + + // StreamValues stores the exact Redis Stream fields appended to Stream. + StreamValues map[string]any +} + +// CompleteRouteFailedInput stores the data required to record one retryable +// publication failure. +type CompleteRouteFailedInput struct { + // ExpectedRoute stores the current route state previously loaded by the + // caller. + ExpectedRoute acceptintent.NotificationRoute + + // LeaseToken stores the route-lease owner token that must still be held. + LeaseToken string + + // FailedAt stores when the publication attempt failed. + FailedAt time.Time + + // NextAttemptAt stores the next scheduled retry time. + NextAttemptAt time.Time + + // FailureClassification stores the classified publication failure kind. + FailureClassification string + + // FailureMessage stores the detailed publication failure text. + FailureMessage string +} + +// CompleteRouteDeadLetterInput stores the data required to record one +// exhausted publication failure. +type CompleteRouteDeadLetterInput struct { + // ExpectedRoute stores the current route state previously loaded by the + // caller. + ExpectedRoute acceptintent.NotificationRoute + + // LeaseToken stores the route-lease owner token that must still be held. + LeaseToken string + + // DeadLetteredAt stores when the route exhausted its retry budget. + DeadLetteredAt time.Time + + // FailureClassification stores the classified terminal failure kind. + FailureClassification string + + // FailureMessage stores the detailed terminal failure text. + FailureMessage string + + // RecoveryHint stores the optional operator-facing recovery guidance. + RecoveryHint string +} + +// ListDueRoutes loads up to limit scheduled routes whose next-attempt score is +// due at or before now. +func (store *AcceptanceStore) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]ScheduledRoute, error) { + if store == nil || store.client == nil { + return nil, errors.New("list due routes: nil store") + } + if ctx == nil { + return nil, errors.New("list due routes: nil context") + } + if err := validateRouteStateTimestamp("list due routes now", now); err != nil { + return nil, err + } + if limit <= 0 { + return nil, errors.New("list due routes: limit must be positive") + } + + members, err := store.client.ZRangeByScore(ctx, store.keys.RouteSchedule(), &redis.ZRangeBy{ + Min: "-inf", + Max: strconv.FormatInt(now.UnixMilli(), 10), + Count: limit, + }).Result() + if err != nil { + return nil, fmt.Errorf("list due routes: %w", err) + } + + routes := make([]ScheduledRoute, 0, len(members)) + for _, member := range members { + notificationID, routeID, err := store.keys.ParseRoute(member) + if err != nil { + return nil, fmt.Errorf("list due routes: %w", err) + } + routes = append(routes, ScheduledRoute{ + RouteKey: member, + NotificationID: notificationID, + RouteID: routeID, + }) + } + + return routes, nil +} + +// ReadRouteScheduleSnapshot returns the current depth of the durable route +// schedule together with its oldest scheduled timestamp when one exists. +func (store *AcceptanceStore) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) { + if store == nil || store.client == nil { + return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store") + } + if ctx == nil { + return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context") + } + + depth, err := store.client.ZCard(ctx, store.keys.RouteSchedule()).Result() + if err != nil { + return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: depth: %w", err) + } + + snapshot := telemetry.RouteScheduleSnapshot{ + Depth: depth, + } + if depth == 0 { + return snapshot, nil + } + + values, err := store.client.ZRangeWithScores(ctx, store.keys.RouteSchedule(), 0, 0).Result() + if err != nil { + return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: oldest scheduled entry: %w", err) + } + if len(values) == 0 { + return snapshot, nil + } + + oldestScheduledFor := time.UnixMilli(int64(values[0].Score)).UTC() + snapshot.OldestScheduledFor = &oldestScheduledFor + return snapshot, nil +} + +// TryAcquireRouteLease attempts to acquire one temporary route lease owned by +// token for ttl. +func (store *AcceptanceStore) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) { + if store == nil || store.client == nil { + return false, errors.New("try acquire route lease: nil store") + } + if ctx == nil { + return false, errors.New("try acquire route lease: nil context") + } + if notificationID == "" { + return false, errors.New("try acquire route lease: notification id must not be empty") + } + if routeID == "" { + return false, errors.New("try acquire route lease: route id must not be empty") + } + if token == "" { + return false, errors.New("try acquire route lease: token must not be empty") + } + if ttl <= 0 { + return false, errors.New("try acquire route lease: ttl must be positive") + } + + acquired, err := store.client.SetNX(ctx, store.keys.RouteLease(notificationID, routeID), token, ttl).Result() + if err != nil { + return false, fmt.Errorf("try acquire route lease: %w", err) + } + + return acquired, nil +} + +// ReleaseRouteLease releases one temporary route lease only when token still +// matches the stored owner value. +func (store *AcceptanceStore) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error { + if store == nil || store.client == nil { + return errors.New("release route lease: nil store") + } + if ctx == nil { + return errors.New("release route lease: nil context") + } + if notificationID == "" { + return errors.New("release route lease: notification id must not be empty") + } + if routeID == "" { + return errors.New("release route lease: route id must not be empty") + } + if token == "" { + return errors.New("release route lease: token must not be empty") + } + + if err := releaseRouteLeaseScript.Run( + ctx, + store.client, + []string{store.keys.RouteLease(notificationID, routeID)}, + token, + ).Err(); err != nil { + return fmt.Errorf("release route lease: %w", err) + } + + return nil +} + +// CompleteRoutePublished atomically appends one outbound stream entry and +// marks the corresponding route as published. +func (store *AcceptanceStore) CompleteRoutePublished(ctx context.Context, input CompleteRoutePublishedInput) error { + if store == nil || store.client == nil { + return errors.New("complete route published: nil store") + } + if ctx == nil { + return errors.New("complete route published: nil context") + } + if err := input.Validate(); err != nil { + return fmt.Errorf("complete route published: %w", err) + } + + updatedRoute := input.ExpectedRoute + updatedRoute.Status = acceptintent.RouteStatusPublished + updatedRoute.AttemptCount++ + updatedRoute.NextAttemptAt = time.Time{} + updatedRoute.LastErrorClassification = "" + updatedRoute.LastErrorMessage = "" + updatedRoute.LastErrorAt = time.Time{} + updatedRoute.UpdatedAt = input.PublishedAt + updatedRoute.PublishedAt = input.PublishedAt + updatedRoute.DeadLetteredAt = time.Time{} + payload, err := MarshalRoute(updatedRoute) + if err != nil { + return fmt.Errorf("complete route published: %w", err) + } + expectedPayload, err := MarshalRoute(input.ExpectedRoute) + if err != nil { + return fmt.Errorf("complete route published: %w", err) + } + streamArgs, err := flattenStreamValues(input.StreamValues) + if err != nil { + return fmt.Errorf("complete route published: %w", err) + } + + result, err := completePublishedRouteScript.Run( + ctx, + store.client, + []string{ + store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), + store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID), + store.keys.RouteSchedule(), + }, + append([]any{ + string(expectedPayload), + input.LeaseToken, + input.Stream, + input.StreamMaxLen, + string(payload), + len(streamArgs) / 2, + }, streamArgs...)..., + ).Int() + switch { + case errors.Is(err, redis.Nil): + return ErrConflict + case err != nil: + return err + case result != 1: + return ErrConflict + default: + return nil + } +} + +// CompleteRouteFailed atomically records one retryable publication failure and +// reschedules the route. +func (store *AcceptanceStore) CompleteRouteFailed(ctx context.Context, input CompleteRouteFailedInput) error { + if store == nil || store.client == nil { + return errors.New("complete route failed: nil store") + } + if ctx == nil { + return errors.New("complete route failed: nil context") + } + if err := input.Validate(); err != nil { + return fmt.Errorf("complete route failed: %w", err) + } + + updatedRoute := input.ExpectedRoute + updatedRoute.Status = acceptintent.RouteStatusFailed + updatedRoute.AttemptCount++ + updatedRoute.NextAttemptAt = input.NextAttemptAt + updatedRoute.LastErrorClassification = input.FailureClassification + updatedRoute.LastErrorMessage = input.FailureMessage + updatedRoute.LastErrorAt = input.FailedAt + updatedRoute.UpdatedAt = input.FailedAt + payload, err := MarshalRoute(updatedRoute) + if err != nil { + return fmt.Errorf("complete route failed: %w", err) + } + + return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error { + pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), payload, redis.SetArgs{KeepTTL: true}) + pipe.ZAdd(ctx, store.keys.RouteSchedule(), redis.Z{ + Score: float64(input.NextAttemptAt.UnixMilli()), + Member: store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), + }) + pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID)) + return nil + }) +} + +// CompleteRouteDeadLetter atomically records one exhausted publication +// failure, stores the dead-letter entry, and removes the route from the +// retry schedule. +func (store *AcceptanceStore) CompleteRouteDeadLetter(ctx context.Context, input CompleteRouteDeadLetterInput) error { + if store == nil || store.client == nil { + return errors.New("complete route dead letter: nil store") + } + if ctx == nil { + return errors.New("complete route dead letter: nil context") + } + if err := input.Validate(); err != nil { + return fmt.Errorf("complete route dead letter: %w", err) + } + + updatedRoute := input.ExpectedRoute + updatedRoute.Status = acceptintent.RouteStatusDeadLetter + updatedRoute.AttemptCount++ + updatedRoute.NextAttemptAt = time.Time{} + updatedRoute.LastErrorClassification = input.FailureClassification + updatedRoute.LastErrorMessage = input.FailureMessage + updatedRoute.LastErrorAt = input.DeadLetteredAt + updatedRoute.UpdatedAt = input.DeadLetteredAt + updatedRoute.DeadLetteredAt = input.DeadLetteredAt + if updatedRoute.AttemptCount < updatedRoute.MaxAttempts { + return fmt.Errorf( + "complete route dead letter: final attempt count %d is below max attempts %d", + updatedRoute.AttemptCount, + updatedRoute.MaxAttempts, + ) + } + + routePayload, err := MarshalRoute(updatedRoute) + if err != nil { + return fmt.Errorf("complete route dead letter: %w", err) + } + deadLetterPayload, err := MarshalDeadLetter(DeadLetterEntry{ + NotificationID: updatedRoute.NotificationID, + RouteID: updatedRoute.RouteID, + Channel: updatedRoute.Channel, + RecipientRef: updatedRoute.RecipientRef, + FinalAttemptCount: updatedRoute.AttemptCount, + MaxAttempts: updatedRoute.MaxAttempts, + FailureClassification: input.FailureClassification, + FailureMessage: input.FailureMessage, + CreatedAt: input.DeadLetteredAt, + RecoveryHint: input.RecoveryHint, + }) + if err != nil { + return fmt.Errorf("complete route dead letter: %w", err) + } + + return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error { + pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), routePayload, redis.SetArgs{KeepTTL: true}) + pipe.Set(ctx, store.keys.DeadLetter(updatedRoute.NotificationID, updatedRoute.RouteID), deadLetterPayload, store.cfg.DeadLetterTTL) + pipe.ZRem(ctx, store.keys.RouteSchedule(), store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID)) + pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID)) + return nil + }) +} + +func (store *AcceptanceStore) completeRouteMutation( + ctx context.Context, + expectedRoute acceptintent.NotificationRoute, + leaseToken string, + mutate func(redis.Pipeliner) error, +) error { + routeKey := store.keys.Route(expectedRoute.NotificationID, expectedRoute.RouteID) + leaseKey := store.keys.RouteLease(expectedRoute.NotificationID, expectedRoute.RouteID) + + watchErr := store.client.Watch(ctx, func(tx *redis.Tx) error { + currentRoute, err := loadWatchedRoute(ctx, tx, routeKey) + switch { + case errors.Is(err, redis.Nil): + return ErrConflict + case err != nil: + return err + } + if err := ensureRoutesEqual(expectedRoute, currentRoute); err != nil { + return err + } + + leaseValue, err := tx.Get(ctx, leaseKey).Result() + switch { + case errors.Is(err, redis.Nil): + return ErrConflict + case err != nil: + return err + case leaseValue != leaseToken: + return ErrConflict + } + + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + return mutate(pipe) + }) + + return err + }, routeKey, leaseKey) + + switch { + case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr): + return ErrConflict + case watchErr != nil: + return watchErr + default: + return nil + } +} + +func loadWatchedRoute(ctx context.Context, tx *redis.Tx, routeKey string) (acceptintent.NotificationRoute, error) { + payload, err := tx.Get(ctx, routeKey).Bytes() + if err != nil { + return acceptintent.NotificationRoute{}, err + } + + return UnmarshalRoute(payload) +} + +func ensureRoutesEqual(expected acceptintent.NotificationRoute, actual acceptintent.NotificationRoute) error { + expectedPayload, err := MarshalRoute(expected) + if err != nil { + return fmt.Errorf("marshal expected route: %w", err) + } + actualPayload, err := MarshalRoute(actual) + if err != nil { + return fmt.Errorf("marshal current route: %w", err) + } + if !bytes.Equal(expectedPayload, actualPayload) { + return ErrConflict + } + + return nil +} + +func validateCompletionRoute(route acceptintent.NotificationRoute) error { + if err := route.Validate(); err != nil { + return err + } + switch route.Status { + case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed: + return nil + default: + return fmt.Errorf("route status %q is not completable", route.Status) + } +} + +func validateStreamValues(values map[string]any) error { + if len(values) == 0 { + return fmt.Errorf("stream values must not be empty") + } + + for key, raw := range values { + if key == "" { + return fmt.Errorf("stream values key must not be empty") + } + switch typed := raw.(type) { + case string: + if typed == "" { + return fmt.Errorf("stream values %q must not be empty", key) + } + case []byte: + if len(typed) == 0 { + return fmt.Errorf("stream values %q must not be empty", key) + } + default: + return fmt.Errorf("stream values %q must be string or []byte", key) + } + } + + return nil +} + +func flattenStreamValues(values map[string]any) ([]any, error) { + keys := make([]string, 0, len(values)) + for key := range values { + keys = append(keys, key) + } + sort.Strings(keys) + + args := make([]any, 0, len(values)*2) + for _, key := range keys { + args = append(args, key, values[key]) + } + + return args, nil +} + +func validateRouteStateTimestamp(name string, value time.Time) error { + if value.IsZero() { + return fmt.Errorf("%s must not be zero", name) + } + if !value.Equal(value.UTC()) { + return fmt.Errorf("%s must be UTC", name) + } + if !value.Equal(value.Truncate(time.Millisecond)) { + return fmt.Errorf("%s must use millisecond precision", name) + } + + return nil +} + +// Validate reports whether route contains a complete due-route reference. +func (route ScheduledRoute) Validate() error { + if route.RouteKey == "" { + return fmt.Errorf("scheduled route key must not be empty") + } + if route.NotificationID == "" { + return fmt.Errorf("scheduled route notification id must not be empty") + } + if route.RouteID == "" { + return fmt.Errorf("scheduled route route id must not be empty") + } + + return nil +} + +// Validate reports whether input contains a complete published-route +// transition. +func (input CompleteRoutePublishedInput) Validate() error { + if err := validateCompletionRoute(input.ExpectedRoute); err != nil { + return err + } + if input.LeaseToken == "" { + return fmt.Errorf("lease token must not be empty") + } + if err := validateRouteStateTimestamp("published at", input.PublishedAt); err != nil { + return err + } + if input.Stream == "" { + return fmt.Errorf("stream must not be empty") + } + if input.StreamMaxLen < 0 { + return fmt.Errorf("stream max len must not be negative") + } + if err := validateStreamValues(input.StreamValues); err != nil { + return err + } + + return nil +} + +// Validate reports whether input contains a complete retryable failure +// transition. +func (input CompleteRouteFailedInput) Validate() error { + if err := validateCompletionRoute(input.ExpectedRoute); err != nil { + return err + } + if input.LeaseToken == "" { + return fmt.Errorf("lease token must not be empty") + } + if err := validateRouteStateTimestamp("failed at", input.FailedAt); err != nil { + return err + } + if err := validateRouteStateTimestamp("next attempt at", input.NextAttemptAt); err != nil { + return err + } + if input.FailureClassification == "" { + return fmt.Errorf("failure classification must not be empty") + } + if input.FailureMessage == "" { + return fmt.Errorf("failure message must not be empty") + } + + return nil +} + +// Validate reports whether input contains a complete dead-letter transition. +func (input CompleteRouteDeadLetterInput) Validate() error { + if err := validateCompletionRoute(input.ExpectedRoute); err != nil { + return err + } + if input.LeaseToken == "" { + return fmt.Errorf("lease token must not be empty") + } + if err := validateRouteStateTimestamp("dead lettered at", input.DeadLetteredAt); err != nil { + return err + } + if input.FailureClassification == "" { + return fmt.Errorf("failure classification must not be empty") + } + if input.FailureMessage == "" { + return fmt.Errorf("failure message must not be empty") + } + + return nil +} diff --git a/notification/internal/adapters/redisstate/route_state_store_test.go b/notification/internal/adapters/redisstate/route_state_store_test.go new file mode 100644 index 0000000..f2a17a9 --- /dev/null +++ b/notification/internal/adapters/redisstate/route_state_store_test.go @@ -0,0 +1,465 @@ +package redisstate + +import ( + "context" + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + + "github.com/alicebob/miniredis/v2" + "github.com/stretchr/testify/require" +) + +func TestAcceptanceStoreListDueRoutesLoadsScheduledMembers(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0))) + + routes, err := store.ListDueRoutes(context.Background(), now, 10) + require.NoError(t, err) + require.Len(t, routes, 2) + require.ElementsMatch(t, []string{"push:user:user-1", "email:user:user-1"}, []string{routes[0].RouteID, routes[1].RouteID}) + + for _, route := range routes { + require.NoError(t, route.Validate()) + } +} + +func TestAcceptanceStoreReadRouteScheduleSnapshot(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0))) + + snapshot, err := store.ReadRouteScheduleSnapshot(context.Background()) + require.NoError(t, err) + require.Equal(t, int64(2), snapshot.Depth) + require.NotNil(t, snapshot.OldestScheduledFor) + require.Equal(t, now, *snapshot.OldestScheduledFor) +} + +func TestAcceptanceStoreRouteLeaseAcquireReleaseAndExpire(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + acquired, err := store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1", 2*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-2", 2*time.Second) + require.NoError(t, err) + require.False(t, acquired) + + require.NoError(t, store.ReleaseRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1")) + acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-3", 2*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + server.FastForward(3 * time.Second) + acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-4", 2*time.Second) + require.NoError(t, err) + require.True(t, acquired) +} + +func TestAcceptanceStoreCompleteRoutePublishedAppendsTrimmedStreamEntryAndMarksRoutePublished(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validUserAcceptanceInput(now, 0) + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + + publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond) + require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{ + ExpectedRoute: route, + LeaseToken: "token-1", + PublishedAt: publishedAt, + Stream: "gateway:client-events", + StreamMaxLen: 1024, + StreamValues: map[string]any{ + "user_id": "user-1", + "event_type": "game.turn.ready", + "event_id": input.Notification.NotificationID + "/push:user:user-1", + "payload_bytes": []byte("payload-1"), + "request_id": "request-1", + "trace_id": "trace-1", + }, + })) + + updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status) + require.Equal(t, 1, updatedRoute.AttemptCount) + require.Equal(t, publishedAt, updatedRoute.PublishedAt) + + scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result() + require.NoError(t, err) + require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled) + + messages, err := client.XRange(context.Background(), "gateway:client-events", "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, "user-1", messages[0].Values["user_id"]) + require.Equal(t, "game.turn.ready", messages[0].Values["event_type"]) + + leaseKey := Keyspace{}.RouteLease(input.Notification.NotificationID, "push:user:user-1") + _, err = client.Get(context.Background(), leaseKey).Result() + require.Error(t, err) +} + +func TestAcceptanceStoreCompleteRoutePublishedAppendsUntrimmedMailCommand(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validUserAcceptanceInput(now, 0) + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "email:user:user-1", "token-1", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1") + require.NoError(t, err) + require.True(t, found) + + publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond) + require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{ + ExpectedRoute: route, + LeaseToken: "token-1", + PublishedAt: publishedAt, + Stream: "mail:delivery_commands", + StreamMaxLen: 0, + StreamValues: map[string]any{ + "delivery_id": input.Notification.NotificationID + "/email:user:user-1", + "source": "notification", + "payload_mode": "template", + "idempotency_key": "notification:" + input.Notification.NotificationID + "/email:user:user-1", + "requested_at_ms": "1775121700000", + "payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`, + }, + })) + + updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status) + require.Equal(t, 1, updatedRoute.AttemptCount) + require.Equal(t, publishedAt, updatedRoute.PublishedAt) + + messages, err := client.XRange(context.Background(), "mail:delivery_commands", "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, "notification", messages[0].Values["source"]) + require.Equal(t, "template", messages[0].Values["payload_mode"]) + require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"]) +} + +func TestAcceptanceStoreCompleteRouteFailedReschedulesRoute(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validUserAcceptanceInput(now, 0) + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + + failedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond) + nextAttemptAt := failedAt.Add(2 * time.Second).UTC().Truncate(time.Millisecond) + require.NoError(t, store.CompleteRouteFailed(context.Background(), CompleteRouteFailedInput{ + ExpectedRoute: route, + LeaseToken: "token-1", + FailedAt: failedAt, + NextAttemptAt: nextAttemptAt, + FailureClassification: "gateway_stream_publish_failed", + FailureMessage: "temporary outage", + })) + + updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusFailed, updatedRoute.Status) + require.Equal(t, 1, updatedRoute.AttemptCount) + require.Equal(t, nextAttemptAt, updatedRoute.NextAttemptAt) + require.Equal(t, "gateway_stream_publish_failed", updatedRoute.LastErrorClassification) + + scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result() + require.NoError(t, err) + require.Len(t, scheduled, 2) + require.Contains(t, []string{ + scheduled[0].Member.(string), + scheduled[1].Member.(string), + }, Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-1")) +} + +func TestAcceptanceStoreCompleteRouteDeadLetterStoresTerminalFailure(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validUserAcceptanceInput(now, 2) + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + + deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond) + require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{ + ExpectedRoute: route, + LeaseToken: "token-1", + DeadLetteredAt: deadLetteredAt, + FailureClassification: "payload_encoding_failed", + FailureMessage: "payload is invalid", + })) + + updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusDeadLetter, updatedRoute.Status) + require.Equal(t, 3, updatedRoute.AttemptCount) + require.Equal(t, deadLetteredAt, updatedRoute.DeadLetteredAt) + + payload, err := client.Get(context.Background(), Keyspace{}.DeadLetter(input.Notification.NotificationID, "push:user:user-1")).Bytes() + require.NoError(t, err) + entry, err := UnmarshalDeadLetter(payload) + require.NoError(t, err) + require.Equal(t, "payload_encoding_failed", entry.FailureClassification) + require.Equal(t, 3, entry.FinalAttemptCount) + + scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result() + require.NoError(t, err) + require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled) +} + +func TestAcceptanceStoreDeadLetterIsIsolatedByChannelAndRecipient(t *testing.T) { + t.Parallel() + + server := miniredis.RunT(t) + client := newTestRedisClient(t, server) + + store, err := NewAcceptanceStore(client, AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + input := validUserAcceptanceInput(now, 2) + input.Notification.RecipientUserIDs = []string{"user-1", "user-2"} + input.Routes = append(input.Routes, + acceptintent.NotificationRoute{ + NotificationID: input.Notification.NotificationID, + RouteID: "push:user:user-2", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-2", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 3, + NextAttemptAt: now, + ResolvedEmail: "second@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: input.Notification.NotificationID, + RouteID: "email:user:user-2", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-2", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "second@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.NoError(t, store.CreateAcceptance(context.Background(), input)) + + acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + + deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond) + require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{ + ExpectedRoute: route, + LeaseToken: "token-1", + DeadLetteredAt: deadLetteredAt, + FailureClassification: "gateway_stream_publish_failed", + FailureMessage: "gateway unavailable", + })) + + deadLetterRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusDeadLetter, deadLetterRoute.Status) + + for _, routeID := range []string{"email:user:user-1", "push:user:user-2", "email:user:user-2"} { + route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, routeID) + require.NoError(t, err) + require.True(t, found, "route %s should remain stored", routeID) + require.Equal(t, acceptintent.RouteStatusPending, route.Status, "route %s should remain pending", routeID) + } + + scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result() + require.NoError(t, err) + require.ElementsMatch(t, []string{ + Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1"), + Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-2"), + Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-2"), + }, scheduled) +} + +func validUserAcceptanceInput(now time.Time, pushAttemptCount int) acceptintent.CreateAcceptanceInput { + return acceptintent.CreateAcceptanceInput{ + Notification: acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + IdempotencyKey: "game-123:turn-54", + RequestFingerprint: "sha256:deadbeef", + RequestID: "request-1", + TraceID: "trace-1", + OccurredAt: now, + AcceptedAt: now, + UpdatedAt: now, + }, + Routes: []acceptintent.NotificationRoute{ + { + NotificationID: "1775121700000-0", + RouteID: "push:user:user-1", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + AttemptCount: pushAttemptCount, + MaxAttempts: 3, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + { + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + }, + Idempotency: acceptintent.IdempotencyRecord{ + Producer: intentstream.ProducerGameMaster, + IdempotencyKey: "game-123:turn-54", + NotificationID: "1775121700000-0", + RequestFingerprint: "sha256:deadbeef", + CreatedAt: now, + ExpiresAt: now.Add(7 * 24 * time.Hour), + }, + } +} diff --git a/notification/internal/adapters/redisstate/stream_offset_store.go b/notification/internal/adapters/redisstate/stream_offset_store.go new file mode 100644 index 0000000..4688f7f --- /dev/null +++ b/notification/internal/adapters/redisstate/stream_offset_store.go @@ -0,0 +1,160 @@ +package redisstate + +import ( + "context" + "errors" + "fmt" + "strconv" + "strings" + "time" + + "galaxy/notification/internal/telemetry" + + "github.com/redis/go-redis/v9" +) + +// StreamOffsetStore provides the Redis-backed storage used for persisted +// plain-XREAD consumer progress. +type StreamOffsetStore struct { + client *redis.Client + keys Keyspace +} + +// NewStreamOffsetStore constructs one Redis-backed stream-offset store. +func NewStreamOffsetStore(client *redis.Client) (*StreamOffsetStore, error) { + if client == nil { + return nil, errors.New("new notification stream offset store: nil redis client") + } + + return &StreamOffsetStore{ + client: client, + keys: Keyspace{}, + }, nil +} + +// Load returns the last processed entry id for stream when one is stored. +func (store *StreamOffsetStore) Load(ctx context.Context, stream string) (string, bool, error) { + if store == nil || store.client == nil { + return "", false, errors.New("load notification stream offset: nil store") + } + if ctx == nil { + return "", false, errors.New("load notification stream offset: nil context") + } + + payload, err := store.client.Get(ctx, store.keys.StreamOffset(stream)).Bytes() + switch { + case errors.Is(err, redis.Nil): + return "", false, nil + case err != nil: + return "", false, fmt.Errorf("load notification stream offset: %w", err) + } + + offset, err := UnmarshalStreamOffset(payload) + if err != nil { + return "", false, fmt.Errorf("load notification stream offset: %w", err) + } + + return offset.LastProcessedEntryID, true, nil +} + +// Save stores the last processed entry id for stream. +func (store *StreamOffsetStore) Save(ctx context.Context, stream string, entryID string) error { + if store == nil || store.client == nil { + return errors.New("save notification stream offset: nil store") + } + if ctx == nil { + return errors.New("save notification stream offset: nil context") + } + + offset := StreamOffset{ + Stream: stream, + LastProcessedEntryID: entryID, + UpdatedAt: time.Now().UTC().Truncate(time.Millisecond), + } + payload, err := MarshalStreamOffset(offset) + if err != nil { + return fmt.Errorf("save notification stream offset: %w", err) + } + if err := store.client.Set(ctx, store.keys.StreamOffset(stream), payload, 0).Err(); err != nil { + return fmt.Errorf("save notification stream offset: %w", err) + } + + return nil +} + +// IntentStreamLagReader provides Redis-backed lag snapshots for one intent +// stream. +type IntentStreamLagReader struct { + store *StreamOffsetStore + stream string +} + +// NewIntentStreamLagReader constructs a lag reader for stream using store. +func NewIntentStreamLagReader(store *StreamOffsetStore, stream string) (*IntentStreamLagReader, error) { + if store == nil || store.client == nil { + return nil, errors.New("new notification intent stream lag reader: nil store") + } + if strings.TrimSpace(stream) == "" { + return nil, errors.New("new notification intent stream lag reader: stream must not be empty") + } + + return &IntentStreamLagReader{ + store: store, + stream: stream, + }, nil +} + +// ReadIntentStreamLagSnapshot returns the oldest stream entry that is newer +// than the persisted plain-XREAD consumer offset for the configured stream. +func (reader *IntentStreamLagReader) ReadIntentStreamLagSnapshot(ctx context.Context) (telemetry.IntentStreamLagSnapshot, error) { + if reader == nil || reader.store == nil { + return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil reader") + } + if ctx == nil { + return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil context") + } + + lastID, found, err := reader.store.Load(ctx, reader.stream) + if err != nil { + return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: %w", err) + } + + minID := "-" + if found { + minID = "(" + lastID + } + + messages, err := reader.store.client.XRangeN(ctx, reader.stream, minID, "+", 1).Result() + if err != nil { + return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry: %w", err) + } + if len(messages) == 0 { + return telemetry.IntentStreamLagSnapshot{}, nil + } + + oldestAt, err := streamEntryTime(messages[0].ID) + if err != nil { + return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry id: %w", err) + } + + return telemetry.IntentStreamLagSnapshot{ + OldestUnprocessedAt: &oldestAt, + }, nil +} + +func streamEntryTime(entryID string) (time.Time, error) { + timestampText, _, ok := strings.Cut(entryID, "-") + if !ok || strings.TrimSpace(timestampText) == "" { + return time.Time{}, fmt.Errorf("entry id %q is not a Redis Stream id", entryID) + } + + timestampMS, err := strconv.ParseInt(timestampText, 10, 64) + if err != nil { + return time.Time{}, err + } + if timestampMS < 0 { + return time.Time{}, fmt.Errorf("entry id %q has negative timestamp", entryID) + } + + return time.UnixMilli(timestampMS).UTC(), nil +} diff --git a/notification/internal/adapters/userservice/client.go b/notification/internal/adapters/userservice/client.go new file mode 100644 index 0000000..7d68aa5 --- /dev/null +++ b/notification/internal/adapters/userservice/client.go @@ -0,0 +1,243 @@ +// Package userservice provides the trusted internal User Service HTTP client +// used by Notification Service recipient enrichment. +package userservice + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "galaxy/notification/internal/service/acceptintent" + + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" +) + +const ( + getUserByIDPathSuffix = "/api/v1/internal/users/%s" + subjectNotFoundErrorCode = "subject_not_found" +) + +// Config configures one HTTP-backed User Service enrichment client. +type Config struct { + // BaseURL stores the absolute base URL of the trusted internal User Service + // HTTP API. + BaseURL string + + // RequestTimeout bounds one outbound lookup request. + RequestTimeout time.Duration +} + +// Client resolves Notification Service recipients through the trusted +// internal User Service HTTP API. +type Client struct { + baseURL string + requestTimeout time.Duration + httpClient *http.Client + closeIdleConnections func() +} + +type getUserByIDResponse struct { + User userView `json:"user"` +} + +type userView struct { + Email string `json:"email"` + PreferredLanguage string `json:"preferred_language"` +} + +type errorEnvelope struct { + Error *errorBody `json:"error"` +} + +type errorBody struct { + Code string `json:"code"` + Message string `json:"message"` +} + +// NewClient constructs a User Service client that uses repository-standard +// HTTP transport instrumentation through otelhttp. +func NewClient(cfg Config) (*Client, error) { + transport, ok := http.DefaultTransport.(*http.Transport) + if !ok { + return nil, errors.New("new notification user service client: default transport is not *http.Transport") + } + + baseTransport := transport.Clone() + + return newClient( + cfg, + &http.Client{Transport: otelhttp.NewTransport(baseTransport)}, + baseTransport.CloseIdleConnections, + ) +} + +func newClient(cfg Config, httpClient *http.Client, closeIdleConnections func()) (*Client, error) { + switch { + case strings.TrimSpace(cfg.BaseURL) == "": + return nil, errors.New("new notification user service client: base URL must not be empty") + case cfg.RequestTimeout <= 0: + return nil, errors.New("new notification user service client: request timeout must be positive") + case httpClient == nil: + return nil, errors.New("new notification user service client: http client must not be nil") + } + + parsedBaseURL, err := url.Parse(strings.TrimRight(strings.TrimSpace(cfg.BaseURL), "/")) + if err != nil { + return nil, fmt.Errorf("new notification user service client: parse base URL: %w", err) + } + if parsedBaseURL.Scheme == "" || parsedBaseURL.Host == "" { + return nil, errors.New("new notification user service client: base URL must be absolute") + } + + return &Client{ + baseURL: parsedBaseURL.String(), + requestTimeout: cfg.RequestTimeout, + httpClient: httpClient, + closeIdleConnections: closeIdleConnections, + }, nil +} + +// Close releases idle HTTP connections owned by the client transport. +func (client *Client) Close() error { + if client == nil || client.closeIdleConnections == nil { + return nil + } + + client.closeIdleConnections() + + return nil +} + +// GetUserByID resolves the current user email and preferred language for the +// supplied stable userID. +func (client *Client) GetUserByID(ctx context.Context, userID string) (acceptintent.UserRecord, error) { + if client == nil || client.httpClient == nil { + return acceptintent.UserRecord{}, errors.New("lookup user by id: nil client") + } + if ctx == nil { + return acceptintent.UserRecord{}, errors.New("lookup user by id: nil context") + } + if err := ctx.Err(); err != nil { + return acceptintent.UserRecord{}, err + } + if strings.TrimSpace(userID) == "" { + return acceptintent.UserRecord{}, errors.New("lookup user by id: user id must not be empty") + } + + payload, statusCode, err := client.doRequest(ctx, http.MethodGet, fmt.Sprintf(getUserByIDPathSuffix, url.PathEscape(userID))) + if err != nil { + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, err) + } + + switch statusCode { + case http.StatusOK: + var response getUserByIDResponse + if err := decodeJSONPayload(payload, &response); err != nil { + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode success response: %w", userID, err) + } + + record := acceptintent.UserRecord{ + Email: response.User.Email, + PreferredLanguage: response.User.PreferredLanguage, + } + if err := record.Validate(); err != nil { + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: invalid success response: %w", userID, err) + } + + return record, nil + case http.StatusNotFound: + errorCode, err := decodeErrorCode(payload) + if err != nil { + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode error response: %w", userID, err) + } + if errorCode == subjectNotFoundErrorCode { + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, acceptintent.ErrRecipientNotFound) + } + + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected error code %q for status %d", userID, errorCode, statusCode) + default: + return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected HTTP status %d", userID, statusCode) + } +} + +func (client *Client) doRequest(ctx context.Context, method string, requestPath string) ([]byte, int, error) { + attemptCtx, cancel := context.WithTimeout(ctx, client.requestTimeout) + defer cancel() + + request, err := http.NewRequestWithContext(attemptCtx, method, client.baseURL+requestPath, nil) + if err != nil { + return nil, 0, fmt.Errorf("build request: %w", err) + } + + response, err := client.httpClient.Do(request) + if err != nil { + return nil, 0, err + } + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + if err != nil { + return nil, 0, fmt.Errorf("read response body: %w", err) + } + + return payload, response.StatusCode, nil +} + +func decodeErrorCode(payload []byte) (string, error) { + var envelope errorEnvelope + if err := decodeStrictJSONPayload(payload, &envelope); err != nil { + return "", err + } + if envelope.Error == nil { + return "", errors.New("missing error object") + } + if strings.TrimSpace(envelope.Error.Code) == "" { + return "", errors.New("missing error code") + } + + return envelope.Error.Code, nil +} + +func decodeJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + + return err + } + + return nil +} + +func decodeStrictJSONPayload(payload []byte, target any) error { + decoder := json.NewDecoder(bytes.NewReader(payload)) + decoder.DisallowUnknownFields() + + if err := decoder.Decode(target); err != nil { + return err + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return errors.New("unexpected trailing JSON input") + } + + return err + } + + return nil +} + +var _ acceptintent.UserDirectory = (*Client)(nil) diff --git a/notification/internal/adapters/userservice/client_test.go b/notification/internal/adapters/userservice/client_test.go new file mode 100644 index 0000000..d826551 --- /dev/null +++ b/notification/internal/adapters/userservice/client_test.go @@ -0,0 +1,219 @@ +package userservice + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" + + "galaxy/notification/internal/service/acceptintent" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewClient(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + cfg Config + wantErr string + }{ + { + name: "valid config", + cfg: Config{ + BaseURL: "http://127.0.0.1:8080", + RequestTimeout: time.Second, + }, + }, + { + name: "empty base url", + cfg: Config{ + RequestTimeout: time.Second, + }, + wantErr: "base URL must not be empty", + }, + { + name: "relative base url", + cfg: Config{ + BaseURL: "/relative", + RequestTimeout: time.Second, + }, + wantErr: "base URL must be absolute", + }, + { + name: "non positive timeout", + cfg: Config{ + BaseURL: "http://127.0.0.1:8080", + }, + wantErr: "request timeout must be positive", + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + client, err := NewClient(tt.cfg) + if tt.wantErr != "" { + require.Error(t, err) + assert.ErrorContains(t, err, tt.wantErr) + return + } + + require.NoError(t, err) + assert.NoError(t, client.Close()) + }) + } +} + +func TestClientGetUserByID(t *testing.T) { + t.Parallel() + + t.Run("success", func(t *testing.T) { + t.Parallel() + + var captured capturedRequest + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + captured = captureRequest(t, r) + writeJSON(t, w, http.StatusOK, map[string]any{ + "user": map[string]any{ + "user_id": "user-123", + "email": "pilot@example.com", + "preferred_language": "en-US", + "time_zone": "Europe/Kaliningrad", + }, + }) + })) + defer server.Close() + + client := newTestClient(t, server.URL, 250*time.Millisecond) + + record, err := client.GetUserByID(context.Background(), "user-123") + require.NoError(t, err) + require.Equal(t, acceptintent.UserRecord{ + Email: "pilot@example.com", + PreferredLanguage: "en-US", + }, record) + require.Equal(t, capturedRequest{ + Method: http.MethodGet, + Path: "/api/v1/internal/users/user-123", + }, captured) + }) + + t.Run("subject not found", func(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + writeJSON(t, w, http.StatusNotFound, map[string]any{ + "error": map[string]any{ + "code": "subject_not_found", + "message": "subject not found", + }, + }) + })) + defer server.Close() + + client := newTestClient(t, server.URL, 250*time.Millisecond) + + _, err := client.GetUserByID(context.Background(), "user-missing") + require.Error(t, err) + require.ErrorIs(t, err, acceptintent.ErrRecipientNotFound) + }) + + t.Run("invalid email is treated as dependency failure", func(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + writeJSON(t, w, http.StatusOK, map[string]any{ + "user": map[string]any{ + "email": "bad@@example.com", + "preferred_language": "en", + }, + }) + })) + defer server.Close() + + client := newTestClient(t, server.URL, 250*time.Millisecond) + + _, err := client.GetUserByID(context.Background(), "user-123") + require.Error(t, err) + require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound) + require.ErrorContains(t, err, "invalid success response") + }) + + t.Run("timeout", func(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + <-r.Context().Done() + })) + defer server.Close() + + client := newTestClient(t, server.URL, 10*time.Millisecond) + + _, err := client.GetUserByID(context.Background(), "user-123") + require.Error(t, err) + require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound) + require.ErrorContains(t, err, "context deadline exceeded") + }) +} + +type capturedRequest struct { + Method string + Path string +} + +func newTestClient(t *testing.T, baseURL string, requestTimeout time.Duration) *Client { + t.Helper() + + client, err := newClient( + Config{ + BaseURL: baseURL, + RequestTimeout: requestTimeout, + }, + &http.Client{Transport: http.DefaultTransport.(*http.Transport).Clone()}, + func() {}, + ) + require.NoError(t, err) + + return client +} + +func captureRequest(t *testing.T, request *http.Request) capturedRequest { + t.Helper() + + _, err := io.ReadAll(request.Body) + require.NoError(t, err) + require.NoError(t, request.Body.Close()) + + return capturedRequest{ + Method: request.Method, + Path: request.URL.Path, + } +} + +func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) { + t.Helper() + + body, err := json.Marshal(payload) + require.NoError(t, err) + + writer.Header().Set("Content-Type", "application/json") + writer.WriteHeader(statusCode) + _, err = writer.Write(body) + require.NoError(t, err) +} + +func TestClientCloseIsNilSafe(t *testing.T) { + t.Parallel() + + var nilClient *Client + require.NoError(t, nilClient.Close()) +} diff --git a/notification/internal/api/doc.go b/notification/internal/api/doc.go new file mode 100644 index 0000000..8561828 --- /dev/null +++ b/notification/internal/api/doc.go @@ -0,0 +1,2 @@ +// Package api reserves the transport-layer namespace of Notification Service. +package api diff --git a/notification/internal/api/intentstream/contract.go b/notification/internal/api/intentstream/contract.go new file mode 100644 index 0000000..238334a --- /dev/null +++ b/notification/internal/api/intentstream/contract.go @@ -0,0 +1,147 @@ +// Package intentstream defines the frozen Redis Stream contract used for +// Notification Service intent intake. +package intentstream + +import ( + "strings" + + "galaxy/notification/internal/service/malformedintent" + "galaxy/notificationintent" +) + +const ( + fieldNotificationType = "notification_type" + fieldProducer = "producer" + fieldAudienceKind = "audience_kind" + fieldRecipientUserIDs = "recipient_user_ids_json" + fieldIdempotencyKey = "idempotency_key" + fieldOccurredAtMS = "occurred_at_ms" + fieldRequestID = "request_id" + fieldTraceID = "trace_id" + fieldPayloadJSON = "payload_json" + defaultResolvedLocale = "en" +) + +// NotificationType identifies one supported normalized notification type. +type NotificationType = notificationintent.NotificationType + +const ( + // NotificationTypeGeoReviewRecommended identifies the + // `geo.review_recommended` notification. + NotificationTypeGeoReviewRecommended = notificationintent.NotificationTypeGeoReviewRecommended + + // NotificationTypeGameTurnReady identifies the `game.turn.ready` + // notification. + NotificationTypeGameTurnReady = notificationintent.NotificationTypeGameTurnReady + + // NotificationTypeGameFinished identifies the `game.finished` + // notification. + NotificationTypeGameFinished = notificationintent.NotificationTypeGameFinished + + // NotificationTypeGameGenerationFailed identifies the + // `game.generation_failed` notification. + NotificationTypeGameGenerationFailed = notificationintent.NotificationTypeGameGenerationFailed + + // NotificationTypeLobbyRuntimePausedAfterStart identifies the + // `lobby.runtime_paused_after_start` notification. + NotificationTypeLobbyRuntimePausedAfterStart = notificationintent.NotificationTypeLobbyRuntimePausedAfterStart + + // NotificationTypeLobbyApplicationSubmitted identifies the + // `lobby.application.submitted` notification. + NotificationTypeLobbyApplicationSubmitted = notificationintent.NotificationTypeLobbyApplicationSubmitted + + // NotificationTypeLobbyMembershipApproved identifies the + // `lobby.membership.approved` notification. + NotificationTypeLobbyMembershipApproved = notificationintent.NotificationTypeLobbyMembershipApproved + + // NotificationTypeLobbyMembershipRejected identifies the + // `lobby.membership.rejected` notification. + NotificationTypeLobbyMembershipRejected = notificationintent.NotificationTypeLobbyMembershipRejected + + // NotificationTypeLobbyInviteCreated identifies the + // `lobby.invite.created` notification. + NotificationTypeLobbyInviteCreated = notificationintent.NotificationTypeLobbyInviteCreated + + // NotificationTypeLobbyInviteRedeemed identifies the + // `lobby.invite.redeemed` notification. + NotificationTypeLobbyInviteRedeemed = notificationintent.NotificationTypeLobbyInviteRedeemed + + // NotificationTypeLobbyInviteExpired identifies the + // `lobby.invite.expired` notification. + NotificationTypeLobbyInviteExpired = notificationintent.NotificationTypeLobbyInviteExpired +) + +// Producer identifies one supported upstream producer. +type Producer = notificationintent.Producer + +const ( + // ProducerGeoProfile identifies Geo Profile Service. + ProducerGeoProfile = notificationintent.ProducerGeoProfile + + // ProducerGameMaster identifies Game Master. + ProducerGameMaster = notificationintent.ProducerGameMaster + + // ProducerGameLobby identifies Game Lobby. + ProducerGameLobby = notificationintent.ProducerGameLobby +) + +// AudienceKind identifies one supported target-audience kind. +type AudienceKind = notificationintent.AudienceKind + +const ( + // AudienceKindUser identifies user-targeted notifications. + AudienceKindUser = notificationintent.AudienceKindUser + + // AudienceKindAdminEmail identifies administrator-email notifications. + AudienceKindAdminEmail = notificationintent.AudienceKindAdminEmail +) + +// Channel identifies one durable notification-delivery channel slot. +type Channel = notificationintent.Channel + +const ( + // ChannelPush identifies the push-delivery channel. + ChannelPush = notificationintent.ChannelPush + + // ChannelEmail identifies the email-delivery channel. + ChannelEmail = notificationintent.ChannelEmail +) + +// Intent stores one normalized notification intent accepted from the Redis +// Stream ingress contract. +type Intent = notificationintent.Intent + +// DecodeIntent validates one raw Redis Stream entry and returns the normalized +// notification intent frozen by the shared producer contract. +func DecodeIntent(fields map[string]any) (Intent, error) { + return notificationintent.DecodeIntent(fields) +} + +// ClassifyDecodeError maps one intake decoding or validation error to the +// stable malformed-intent failure surface. +func ClassifyDecodeError(err error) malformedintent.FailureCode { + if err == nil { + return malformedintent.FailureCodeInvalidIntent + } + + message := err.Error() + switch { + case strings.Contains(message, "payload_json"), + strings.Contains(message, "turn_number"), + strings.Contains(message, "final_turn_number"), + strings.Contains(message, "failure_reason"), + strings.Contains(message, "applicant_name"), + strings.Contains(message, "inviter_name"), + strings.Contains(message, "invitee_name"), + strings.Contains(message, "review_reason"): + return malformedintent.FailureCodeInvalidPayload + default: + return malformedintent.FailureCodeInvalidIntent + } +} + +// DefaultResolvedLocale returns the frozen fallback locale assigned when the +// current rollout has no supported exact user locale other than English. +func DefaultResolvedLocale() string { + return defaultResolvedLocale +} diff --git a/notification/internal/api/intentstream/contract_test.go b/notification/internal/api/intentstream/contract_test.go new file mode 100644 index 0000000..f9d7821 --- /dev/null +++ b/notification/internal/api/intentstream/contract_test.go @@ -0,0 +1,145 @@ +package intentstream + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestDecodeIntentNormalizesUserRecipientsAndPayload(t *testing.T) { + t.Parallel() + + fields := map[string]any{ + fieldNotificationType: NotificationTypeGameTurnReady.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-2","user-1"]`, + fieldIdempotencyKey: "game-123:turn-54", + fieldOccurredAtMS: "1775121700000", + fieldPayloadJSON: `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`, + fieldRequestID: "request-123", + fieldTraceID: "trace-123", + } + + intent, err := DecodeIntent(fields) + require.NoError(t, err) + require.Equal(t, []string{"user-1", "user-2"}, intent.RecipientUserIDs) + require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, intent.PayloadJSON) + require.Equal(t, time.UnixMilli(1775121700000).UTC(), intent.OccurredAt) +} + +func TestDecodeIntentCanonicalizesEquivalentPayloadJSON(t *testing.T) { + t.Parallel() + + fieldsA := map[string]any{ + fieldNotificationType: NotificationTypeGameFinished.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "game-123:finished", + fieldOccurredAtMS: "1775121700001", + fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","final_turn_number":54}`, + } + fieldsB := map[string]any{ + fieldNotificationType: NotificationTypeGameFinished.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "game-123:finished", + fieldOccurredAtMS: "1775121709999", + fieldPayloadJSON: `{"final_turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`, + } + + intentA, err := DecodeIntent(fieldsA) + require.NoError(t, err) + intentB, err := DecodeIntent(fieldsB) + require.NoError(t, err) + + require.Equal(t, intentA.PayloadJSON, intentB.PayloadJSON) +} + +func TestDecodeIntentRejectsUnsupportedTopLevelField(t *testing.T) { + t.Parallel() + + fields := map[string]any{ + fieldNotificationType: NotificationTypeGameTurnReady.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "game-123:turn-54", + fieldOccurredAtMS: "1775121700000", + fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + "unexpected": "boom", + } + + _, err := DecodeIntent(fields) + require.Error(t, err) + require.Contains(t, err.Error(), "unsupported fields") + require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err))) +} + +func TestDecodeIntentRejectsDuplicateRecipientUserIDs(t *testing.T) { + t.Parallel() + + fields := map[string]any{ + fieldNotificationType: NotificationTypeGameTurnReady.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1","user-1"]`, + fieldIdempotencyKey: "game-123:turn-54", + fieldOccurredAtMS: "1775121700000", + fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + } + + _, err := DecodeIntent(fields) + require.Error(t, err) + require.Contains(t, err.Error(), "duplicates user id") + require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err))) +} + +func TestDecodeIntentRejectsInvalidPayloadJSON(t *testing.T) { + t.Parallel() + + fields := map[string]any{ + fieldNotificationType: NotificationTypeLobbyInviteCreated.String(), + fieldProducer: ProducerGameLobby.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "invite-created:user-1", + fieldOccurredAtMS: "1775121700000", + fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","inviter_user_id":"user-2"}`, + } + + _, err := DecodeIntent(fields) + require.Error(t, err) + require.Contains(t, err.Error(), "payload_json.inviter_name is required") + require.Equal(t, malformedFailureCodeInvalidPayload(), string(ClassifyDecodeError(err))) +} + +func TestDecodeIntentRejectsAdminRecipientsField(t *testing.T) { + t.Parallel() + + fields := map[string]any{ + fieldNotificationType: NotificationTypeGeoReviewRecommended.String(), + fieldProducer: ProducerGeoProfile.String(), + fieldAudienceKind: AudienceKindAdminEmail.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "geo:user-1", + fieldOccurredAtMS: "1775121700000", + fieldPayloadJSON: `{"user_id":"user-1","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}`, + } + + _, err := DecodeIntent(fields) + require.Error(t, err) + require.Contains(t, err.Error(), "must not be present") + require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err))) +} + +func malformedFailureCodeInvalidIntent() string { + return "invalid_intent" +} + +func malformedFailureCodeInvalidPayload() string { + return "invalid_payload" +} diff --git a/notification/internal/api/internalhttp/server.go b/notification/internal/api/internalhttp/server.go new file mode 100644 index 0000000..a15ac77 --- /dev/null +++ b/notification/internal/api/internalhttp/server.go @@ -0,0 +1,252 @@ +// Package internalhttp provides the private probe HTTP listener used by the +// runnable Notification Service process. +package internalhttp + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "net" + "net/http" + "strconv" + "sync" + "time" + + "galaxy/notification/internal/telemetry" + + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel/attribute" +) + +const jsonContentType = "application/json; charset=utf-8" + +const ( + // HealthzPath is the private liveness probe route. + HealthzPath = "/healthz" + + // ReadyzPath is the private readiness probe route. + ReadyzPath = "/readyz" +) + +// Config describes the private internal HTTP listener owned by Notification +// Service. +type Config struct { + // Addr is the TCP listen address used by the private probe HTTP server. + Addr string + + // ReadHeaderTimeout bounds how long the listener may spend reading request + // headers before the server rejects the connection. + ReadHeaderTimeout time.Duration + + // ReadTimeout bounds how long the listener may spend reading one request. + ReadTimeout time.Duration + + // IdleTimeout bounds how long the listener keeps an idle keep-alive + // connection open. + IdleTimeout time.Duration +} + +// Validate reports whether cfg contains a usable private HTTP listener +// configuration. +func (cfg Config) Validate() error { + switch { + case cfg.Addr == "": + return errors.New("internal HTTP addr must not be empty") + case cfg.ReadHeaderTimeout <= 0: + return errors.New("internal HTTP read header timeout must be positive") + case cfg.ReadTimeout <= 0: + return errors.New("internal HTTP read timeout must be positive") + case cfg.IdleTimeout <= 0: + return errors.New("internal HTTP idle timeout must be positive") + default: + return nil + } +} + +// Dependencies describes the collaborators used by the private probe +// transport layer. +type Dependencies struct { + // Logger writes structured listener lifecycle logs. When nil, slog.Default + // is used. + Logger *slog.Logger + + // Telemetry records low-cardinality probe metrics and lifecycle events. + Telemetry *telemetry.Runtime +} + +// Server owns the private probe HTTP listener exposed by Notification +// Service. +type Server struct { + cfg Config + + handler http.Handler + logger *slog.Logger + metrics *telemetry.Runtime + + stateMu sync.RWMutex + server *http.Server + listener net.Listener +} + +// NewServer constructs one private probe HTTP server for cfg and deps. +func NewServer(cfg Config, deps Dependencies) (*Server, error) { + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("new internal HTTP server: %w", err) + } + + logger := deps.Logger + if logger == nil { + logger = slog.Default() + } + + return &Server{ + cfg: cfg, + handler: newHandler(logger, deps.Telemetry), + logger: logger.With("component", "internal_http"), + metrics: deps.Telemetry, + }, nil +} + +// Run binds the configured listener and serves the private probe surface until +// Shutdown closes the server. +func (server *Server) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run internal HTTP server: nil context") + } + if err := ctx.Err(); err != nil { + return err + } + + listener, err := net.Listen("tcp", server.cfg.Addr) + if err != nil { + return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err) + } + + httpServer := &http.Server{ + Handler: server.handler, + ReadHeaderTimeout: server.cfg.ReadHeaderTimeout, + ReadTimeout: server.cfg.ReadTimeout, + IdleTimeout: server.cfg.IdleTimeout, + } + + server.stateMu.Lock() + server.server = httpServer + server.listener = listener + server.stateMu.Unlock() + + server.logger.Info("notification internal HTTP server started", "addr", listener.Addr().String()) + server.metrics.RecordInternalHTTPEvent(context.Background(), "started") + + defer func() { + server.stateMu.Lock() + server.server = nil + server.listener = nil + server.stateMu.Unlock() + }() + + err = httpServer.Serve(listener) + switch { + case err == nil: + return nil + case errors.Is(err, http.ErrServerClosed): + server.logger.Info("notification internal HTTP server stopped") + server.metrics.RecordInternalHTTPEvent(context.Background(), "stopped") + return nil + default: + return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err) + } +} + +// Shutdown gracefully stops the private probe HTTP server within ctx. +func (server *Server) Shutdown(ctx context.Context) error { + if ctx == nil { + return errors.New("shutdown internal HTTP server: nil context") + } + + server.stateMu.RLock() + httpServer := server.server + server.stateMu.RUnlock() + + if httpServer == nil { + return nil + } + + if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) { + return fmt.Errorf("shutdown internal HTTP server: %w", err) + } + + return nil +} + +func newHandler(logger *slog.Logger, metrics *telemetry.Runtime) http.Handler { + mux := http.NewServeMux() + mux.HandleFunc("GET "+HealthzPath, handleHealthz) + mux.HandleFunc("GET "+ReadyzPath, handleReadyz) + + return otelhttp.NewHandler(withObservability(mux, metrics), "notification.internal_http") +} + +func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler { + return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + startedAt := time.Now() + recorder := &statusRecorder{ + ResponseWriter: writer, + statusCode: http.StatusOK, + } + + next.ServeHTTP(recorder, request) + + route := request.Pattern + switch recorder.statusCode { + case http.StatusMethodNotAllowed: + route = "method_not_allowed" + case http.StatusNotFound: + route = "not_found" + case 0: + route = "unmatched" + } + if route == "" { + route = "unmatched" + } + + metrics.RecordInternalHTTPRequest( + request.Context(), + []attribute.KeyValue{ + attribute.String("route", route), + attribute.String("method", request.Method), + attribute.String("status_code", strconv.Itoa(recorder.statusCode)), + }, + time.Since(startedAt), + ) + }) +} + +func handleHealthz(writer http.ResponseWriter, _ *http.Request) { + writeStatusResponse(writer, http.StatusOK, "ok") +} + +func handleReadyz(writer http.ResponseWriter, _ *http.Request) { + writeStatusResponse(writer, http.StatusOK, "ready") +} + +func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) { + writer.Header().Set("Content-Type", jsonContentType) + writer.WriteHeader(statusCode) + _ = json.NewEncoder(writer).Encode(statusResponse{Status: status}) +} + +type statusResponse struct { + Status string `json:"status"` +} + +type statusRecorder struct { + http.ResponseWriter + statusCode int +} + +func (recorder *statusRecorder) WriteHeader(statusCode int) { + recorder.statusCode = statusCode + recorder.ResponseWriter.WriteHeader(statusCode) +} diff --git a/notification/internal/api/internalhttp/server_test.go b/notification/internal/api/internalhttp/server_test.go new file mode 100644 index 0000000..6168d65 --- /dev/null +++ b/notification/internal/api/internalhttp/server_test.go @@ -0,0 +1,272 @@ +package internalhttp + +import ( + "context" + "encoding/json" + "io" + "net" + "net/http" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewServerRejectsInvalidConfiguration(t *testing.T) { + t.Parallel() + + cfg := Config{ + ReadHeaderTimeout: time.Second, + ReadTimeout: time.Second, + IdleTimeout: time.Second, + } + + _, err := NewServer(cfg, Dependencies{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "addr") +} + +func TestServerRunAndShutdown(t *testing.T) { + t.Parallel() + + cfg := testConfig(t) + server, err := NewServer(cfg, Dependencies{}) + require.NoError(t, err) + + runErr := make(chan error, 1) + go func() { + runErr <- server.Run(context.Background()) + }() + + client := newTestHTTPClient(t) + waitForHealthzReady(t, client, cfg.Addr) + + shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, server.Shutdown(shutdownCtx)) + waitForServerRunResult(t, runErr) +} + +func TestProbeRoutesReturnStableJSON(t *testing.T) { + t.Parallel() + + cfg := testConfig(t) + server, err := NewServer(cfg, Dependencies{}) + require.NoError(t, err) + + runErr := make(chan error, 1) + go func() { + runErr <- server.Run(context.Background()) + }() + + client := newTestHTTPClient(t) + waitForHealthzReady(t, client, cfg.Addr) + + tests := []struct { + path string + status string + }{ + {path: HealthzPath, status: "ok"}, + {path: ReadyzPath, status: "ready"}, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.path, func(t *testing.T) { + request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+tt.path, nil) + require.NoError(t, err) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + require.Equal(t, http.StatusOK, response.StatusCode) + require.Equal(t, "application/json; charset=utf-8", response.Header.Get("Content-Type")) + + var payload statusResponse + require.NoError(t, json.NewDecoder(response.Body).Decode(&payload)) + require.Equal(t, tt.status, payload.Status) + }) + } + + shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, server.Shutdown(shutdownCtx)) + waitForServerRunResult(t, runErr) +} + +func TestServerDoesNotExposeMetricsOrUnknownRoutes(t *testing.T) { + t.Parallel() + + cfg := testConfig(t) + server, err := NewServer(cfg, Dependencies{}) + require.NoError(t, err) + + runErr := make(chan error, 1) + go func() { + runErr <- server.Run(context.Background()) + }() + + client := newTestHTTPClient(t) + waitForHealthzReady(t, client, cfg.Addr) + + for _, path := range []string{"/metrics", "/unknown"} { + request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+path, nil) + require.NoError(t, err) + + response, err := client.Do(request) + require.NoError(t, err) + _, _ = io.ReadAll(response.Body) + response.Body.Close() + + assert.Equalf(t, http.StatusNotFound, response.StatusCode, "path %s", path) + } + + shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, server.Shutdown(shutdownCtx)) + waitForServerRunResult(t, runErr) +} + +func TestServerPreservesStandardHEADBehavior(t *testing.T) { + t.Parallel() + + cfg := testConfig(t) + server, err := NewServer(cfg, Dependencies{}) + require.NoError(t, err) + + runErr := make(chan error, 1) + go func() { + runErr <- server.Run(context.Background()) + }() + + client := newTestHTTPClient(t) + waitForHealthzReady(t, client, cfg.Addr) + + request, err := http.NewRequest(http.MethodHead, "http://"+cfg.Addr+HealthzPath, nil) + require.NoError(t, err) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + + body, err := io.ReadAll(response.Body) + require.NoError(t, err) + require.Equal(t, http.StatusOK, response.StatusCode) + require.Empty(t, body) + + shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, server.Shutdown(shutdownCtx)) + waitForServerRunResult(t, runErr) +} + +func TestServerUsesStandardMethodNotAllowedBehavior(t *testing.T) { + t.Parallel() + + cfg := testConfig(t) + server, err := NewServer(cfg, Dependencies{}) + require.NoError(t, err) + + runErr := make(chan error, 1) + go func() { + runErr <- server.Run(context.Background()) + }() + + client := newTestHTTPClient(t) + waitForHealthzReady(t, client, cfg.Addr) + + request, err := http.NewRequest(http.MethodPost, "http://"+cfg.Addr+HealthzPath, nil) + require.NoError(t, err) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + _, _ = io.ReadAll(response.Body) + + require.Equal(t, http.StatusMethodNotAllowed, response.StatusCode) + require.Contains(t, response.Header.Get("Allow"), http.MethodGet) + require.Contains(t, response.Header.Get("Allow"), http.MethodHead) + + shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + require.NoError(t, server.Shutdown(shutdownCtx)) + waitForServerRunResult(t, runErr) +} + +func testConfig(t *testing.T) Config { + t.Helper() + + return Config{ + Addr: mustFreeAddr(t), + ReadHeaderTimeout: time.Second, + ReadTimeout: 2 * time.Second, + IdleTimeout: time.Minute, + } +} + +func newTestHTTPClient(t *testing.T) *http.Client { + t.Helper() + + transport := &http.Transport{DisableKeepAlives: true} + t.Cleanup(transport.CloseIdleConnections) + + return &http.Client{ + Timeout: 250 * time.Millisecond, + Transport: transport, + } +} + +func waitForHealthzReady(t *testing.T, client *http.Client, addr string) { + t.Helper() + + require.Eventually(t, func() bool { + request, err := http.NewRequest(http.MethodGet, "http://"+addr+HealthzPath, nil) + if err != nil { + return false + } + + response, err := client.Do(request) + if err != nil { + return false + } + defer response.Body.Close() + + payload, err := io.ReadAll(response.Body) + if err != nil { + return false + } + + return response.StatusCode == http.StatusOK && strings.Contains(string(payload), `"status":"ok"`) + }, 5*time.Second, 25*time.Millisecond, "internal HTTP server did not become reachable") +} + +func waitForServerRunResult(t *testing.T, runErr <-chan error) { + t.Helper() + + var err error + require.Eventually(t, func() bool { + select { + case err = <-runErr: + return true + default: + return false + } + }, 5*time.Second, 10*time.Millisecond, "internal HTTP server did not stop") + require.NoError(t, err) +} + +func mustFreeAddr(t *testing.T) string { + t.Helper() + + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + defer func() { + assert.NoError(t, listener.Close()) + }() + + return listener.Addr().String() +} diff --git a/notification/internal/app/app.go b/notification/internal/app/app.go new file mode 100644 index 0000000..9c20c99 --- /dev/null +++ b/notification/internal/app/app.go @@ -0,0 +1,168 @@ +// Package app wires the Notification Service process lifecycle and +// coordinates component startup and graceful shutdown. +package app + +import ( + "context" + "errors" + "fmt" + "sync" + + "galaxy/notification/internal/config" +) + +// Component is a long-lived Notification Service subsystem that participates +// in coordinated startup and graceful shutdown. +type Component interface { + // Run starts the component and blocks until it stops. + Run(context.Context) error + + // Shutdown stops the component within the provided timeout-bounded context. + Shutdown(context.Context) error +} + +// App owns the process-level lifecycle of Notification Service and its +// registered components. +type App struct { + cfg config.Config + components []Component +} + +// New constructs App with a defensive copy of the supplied components. +func New(cfg config.Config, components ...Component) *App { + clonedComponents := append([]Component(nil), components...) + + return &App{ + cfg: cfg, + components: clonedComponents, + } +} + +// Run starts all configured components, waits for cancellation or the first +// component failure, and then executes best-effort graceful shutdown. +func (app *App) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run notification app: nil context") + } + if err := app.validate(); err != nil { + return err + } + if len(app.components) == 0 { + <-ctx.Done() + return nil + } + + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + results := make(chan componentResult, len(app.components)) + var runWaitGroup sync.WaitGroup + + for index, component := range app.components { + runWaitGroup.Add(1) + + go func(componentIndex int, component Component) { + defer runWaitGroup.Done() + results <- componentResult{ + index: componentIndex, + err: component.Run(runCtx), + } + }(index, component) + } + + var runErr error + + select { + case <-ctx.Done(): + case result := <-results: + runErr = classifyComponentResult(ctx, result) + } + + cancel() + + shutdownErr := app.shutdownComponents() + waitErr := app.waitForComponents(&runWaitGroup) + + return errors.Join(runErr, shutdownErr, waitErr) +} + +type componentResult struct { + index int + err error +} + +func (app *App) validate() error { + if app.cfg.ShutdownTimeout <= 0 { + return fmt.Errorf("run notification app: shutdown timeout must be positive, got %s", app.cfg.ShutdownTimeout) + } + + for index, component := range app.components { + if component == nil { + return fmt.Errorf("run notification app: component %d is nil", index) + } + } + + return nil +} + +func classifyComponentResult(parentCtx context.Context, result componentResult) error { + switch { + case result.err == nil: + if parentCtx.Err() != nil { + return nil + } + return fmt.Errorf("run notification app: component %d exited without error before shutdown", result.index) + case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil: + return nil + default: + return fmt.Errorf("run notification app: component %d: %w", result.index, result.err) + } +} + +func (app *App) shutdownComponents() error { + var shutdownWaitGroup sync.WaitGroup + errs := make(chan error, len(app.components)) + + for index, component := range app.components { + shutdownWaitGroup.Add(1) + + go func(componentIndex int, component Component) { + defer shutdownWaitGroup.Done() + + shutdownCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout) + defer cancel() + + if err := component.Shutdown(shutdownCtx); err != nil { + errs <- fmt.Errorf("shutdown notification component %d: %w", componentIndex, err) + } + }(index, component) + } + + shutdownWaitGroup.Wait() + close(errs) + + var joined error + for err := range errs { + joined = errors.Join(joined, err) + } + + return joined +} + +func (app *App) waitForComponents(runWaitGroup *sync.WaitGroup) error { + done := make(chan struct{}) + go func() { + runWaitGroup.Wait() + close(done) + }() + + waitCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout) + defer cancel() + + select { + case <-done: + return nil + case <-waitCtx.Done(): + return fmt.Errorf("wait for notification components: %w", waitCtx.Err()) + } +} diff --git a/notification/internal/app/runtime.go b/notification/internal/app/runtime.go new file mode 100644 index 0000000..f7ca904 --- /dev/null +++ b/notification/internal/app/runtime.go @@ -0,0 +1,229 @@ +package app + +import ( + "context" + "errors" + "fmt" + "log/slog" + + redisadapter "galaxy/notification/internal/adapters/redis" + "galaxy/notification/internal/adapters/redisstate" + userserviceadapter "galaxy/notification/internal/adapters/userservice" + "galaxy/notification/internal/api/internalhttp" + "galaxy/notification/internal/config" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/telemetry" + "galaxy/notification/internal/worker" + + "github.com/redis/go-redis/v9" +) + +// Runtime owns the runnable Notification Service process plus the cleanup +// functions that release runtime resources after shutdown. +type Runtime struct { + cfg config.Config + + app *App + + probeServer *internalhttp.Server + telemetry *telemetry.Runtime + intentConsumer *worker.IntentConsumer + pushPublisher *worker.PushPublisher + emailPublisher *worker.EmailPublisher + + cleanupFns []func() error +} + +// NewRuntime constructs the runnable Notification Service process from cfg. +func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) { + if ctx == nil { + return nil, fmt.Errorf("new notification runtime: nil context") + } + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("new notification runtime: %w", err) + } + if logger == nil { + logger = slog.Default() + } + + runtime := &Runtime{ + cfg: cfg, + } + cleanupOnError := func(err error) (*Runtime, error) { + if cleanupErr := runtime.Close(); cleanupErr != nil { + return nil, fmt.Errorf("%w; cleanup: %w", err, cleanupErr) + } + + return nil, err + } + + telemetryRuntime, err := telemetry.NewProcess(ctx, telemetry.ProcessConfig{ + ServiceName: cfg.Telemetry.ServiceName, + TracesExporter: cfg.Telemetry.TracesExporter, + MetricsExporter: cfg.Telemetry.MetricsExporter, + TracesProtocol: cfg.Telemetry.TracesProtocol, + MetricsProtocol: cfg.Telemetry.MetricsProtocol, + StdoutTracesEnabled: cfg.Telemetry.StdoutTracesEnabled, + StdoutMetricsEnabled: cfg.Telemetry.StdoutMetricsEnabled, + }, logger.With("component", "telemetry")) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: telemetry: %w", err)) + } + runtime.telemetry = telemetryRuntime + runtime.cleanupFns = append(runtime.cleanupFns, func() error { + shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout) + defer cancel() + return telemetryRuntime.Shutdown(shutdownCtx) + }) + + redisClient := redisadapter.NewClient(cfg.Redis) + if err := redisadapter.InstrumentClient(redisClient, telemetryRuntime); err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: %w", err)) + } + runtime.cleanupFns = append(runtime.cleanupFns, func() error { + err := redisClient.Close() + if errors.Is(err, redis.ErrClosed) { + return nil + } + return err + }) + if err := redisadapter.Ping(ctx, cfg.Redis, redisClient); err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: %w", err)) + } + + acceptanceStore, err := redisstate.NewAcceptanceStore(redisClient, redisstate.AcceptanceConfig{ + RecordTTL: cfg.Retry.RecordTTL, + DeadLetterTTL: cfg.Retry.DeadLetterTTL, + IdempotencyTTL: cfg.Retry.IdempotencyTTL, + }) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: acceptance store: %w", err)) + } + malformedIntentStore, err := redisstate.NewMalformedIntentStore(redisClient, cfg.Retry.DeadLetterTTL) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: malformed intent store: %w", err)) + } + streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: stream offset store: %w", err)) + } + intentStreamLagReader, err := redisstate.NewIntentStreamLagReader(streamOffsetStore, cfg.Streams.Intents) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err)) + } + telemetryRuntime.SetRouteScheduleSnapshotReader(acceptanceStore) + telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader) + userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{ + BaseURL: cfg.UserService.BaseURL, + RequestTimeout: cfg.UserService.Timeout, + }) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err)) + } + runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close) + acceptIntentService, err := acceptintent.New(acceptintent.Config{ + Store: acceptanceStore, + UserDirectory: userDirectory, + Clock: nil, + Logger: logger, + Telemetry: telemetryRuntime, + PushMaxAttempts: cfg.Retry.PushMaxAttempts, + EmailMaxAttempts: cfg.Retry.EmailMaxAttempts, + IdempotencyTTL: cfg.Retry.IdempotencyTTL, + AdminRouting: cfg.AdminRouting, + }) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: accept intent service: %w", err)) + } + intentConsumer, err := worker.NewIntentConsumer(worker.IntentConsumerConfig{ + Client: redisClient, + Stream: cfg.Streams.Intents, + BlockTimeout: cfg.IntentsReadBlockTimeout, + Acceptor: acceptIntentService, + MalformedRecorder: malformedIntentStore, + OffsetStore: streamOffsetStore, + Telemetry: telemetryRuntime, + }, logger) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: intent consumer: %w", err)) + } + runtime.intentConsumer = intentConsumer + pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{ + Store: acceptanceStore, + GatewayStream: cfg.Streams.GatewayClientEvents, + GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen, + RouteLeaseTTL: cfg.Retry.RouteLeaseTTL, + RouteBackoffMin: cfg.Retry.RouteBackoffMin, + RouteBackoffMax: cfg.Retry.RouteBackoffMax, + Encoder: nil, + Telemetry: telemetryRuntime, + Clock: nil, + }, logger) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err)) + } + runtime.pushPublisher = pushPublisher + emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{ + Store: acceptanceStore, + MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands, + RouteLeaseTTL: cfg.Retry.RouteLeaseTTL, + RouteBackoffMin: cfg.Retry.RouteBackoffMin, + RouteBackoffMax: cfg.Retry.RouteBackoffMax, + Encoder: nil, + Telemetry: telemetryRuntime, + Clock: nil, + }, logger) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err)) + } + runtime.emailPublisher = emailPublisher + + probeServer, err := internalhttp.NewServer(internalhttp.Config{ + Addr: cfg.InternalHTTP.Addr, + ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout, + ReadTimeout: cfg.InternalHTTP.ReadTimeout, + IdleTimeout: cfg.InternalHTTP.IdleTimeout, + }, internalhttp.Dependencies{ + Logger: logger, + Telemetry: telemetryRuntime, + }) + if err != nil { + return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err)) + } + runtime.probeServer = probeServer + runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher) + + return runtime, nil +} + +// Run serves the private probe HTTP listener until ctx is canceled or one +// component fails. +func (runtime *Runtime) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run notification runtime: nil context") + } + if runtime == nil { + return errors.New("run notification runtime: nil runtime") + } + if runtime.app == nil { + return errors.New("run notification runtime: nil app") + } + + return runtime.app.Run(ctx) +} + +// Close releases every runtime dependency in reverse construction order. +func (runtime *Runtime) Close() error { + if runtime == nil { + return nil + } + + var joined error + for index := len(runtime.cleanupFns) - 1; index >= 0; index-- { + if err := runtime.cleanupFns[index](); err != nil { + joined = errors.Join(joined, err) + } + } + + return joined +} diff --git a/notification/internal/app/runtime_smoke_test.go b/notification/internal/app/runtime_smoke_test.go new file mode 100644 index 0000000..fb102ee --- /dev/null +++ b/notification/internal/app/runtime_smoke_test.go @@ -0,0 +1,72 @@ +package app + +import ( + "context" + "net/http" + "os" + "testing" + "time" + + "galaxy/notification/internal/config" + + "github.com/stretchr/testify/require" + testcontainers "github.com/testcontainers/testcontainers-go" + rediscontainer "github.com/testcontainers/testcontainers-go/modules/redis" +) + +const ( + realRuntimeSmokeEnv = "NOTIFICATION_REAL_RUNTIME_SMOKE" + realRuntimeRedisImage = "redis:7" +) + +func TestRealRuntimeCompatibility(t *testing.T) { + if os.Getenv(realRuntimeSmokeEnv) != "1" { + t.Skipf("set %s=1 to run the real runtime smoke suite", realRuntimeSmokeEnv) + } + + ctx := context.Background() + + redisContainer, err := rediscontainer.Run(ctx, realRuntimeRedisImage) + require.NoError(t, err) + testcontainers.CleanupContainer(t, redisContainer) + + redisAddr, err := redisContainer.Endpoint(ctx, "") + require.NoError(t, err) + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisAddr + cfg.UserService.BaseURL = "http://user-service.internal" + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 2 * time.Second + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := &http.Client{ + Timeout: 500 * time.Millisecond, + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + t.Cleanup(client.CloseIdleConnections) + + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK) + assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK) + + cancel() + waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second) +} diff --git a/notification/internal/app/runtime_test.go b/notification/internal/app/runtime_test.go new file mode 100644 index 0000000..157f962 --- /dev/null +++ b/notification/internal/app/runtime_test.go @@ -0,0 +1,581 @@ +package app + +import ( + "context" + "encoding/json" + "io" + "log/slog" + "net" + "net/http" + "net/http/httptest" + "strconv" + "testing" + "time" + + redisstate "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/config" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewRuntimeStartsProbeListenerAndStopsCleanly(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {}) + defer userService.Close() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisServer.Addr() + cfg.UserService.BaseURL = userService.URL + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 10 * time.Second + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := newTestHTTPClient(t) + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK) + assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK) + assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/metrics", http.StatusNotFound) + + cancel() + waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second) +} + +func TestNewRuntimeFailsFastWhenRedisPingCheckFails(t *testing.T) { + t.Parallel() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = mustFreeAddr(t) + cfg.UserService.BaseURL = "http://127.0.0.1:18080" + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.Nil(t, runtime) + require.Error(t, err) + assert.ErrorContains(t, err, "ping redis") +} + +func TestNewRuntimeAcceptsIntentThroughConsumer(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisServer.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, redisClient.Close()) + }) + userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) { + writeJSON(t, writer, http.StatusOK, map[string]any{ + "user": map[string]any{ + "email": "pilot@example.com", + "preferred_language": "en-US", + }, + }) + }) + defer userService.Close() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisServer.Addr() + cfg.UserService.BaseURL = userService.URL + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 10 * time.Second + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := newTestHTTPClient(t) + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + + messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{ + Stream: cfg.Streams.Intents, + Values: map[string]any{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "recipient_user_ids_json": `["user-1"]`, + "idempotency_key": "game-123:turn-ready", + "occurred_at_ms": "1775121700000", + "payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + }, + }).Result() + require.NoError(t, err) + + require.Eventually(t, func() bool { + payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes() + if err != nil { + return false + } + route, err := redisstate.UnmarshalRoute(payload) + if err != nil { + return false + } + return route.ResolvedEmail == "pilot@example.com" && route.ResolvedLocale == "en" + }, time.Second, 10*time.Millisecond) + + cancel() + waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second) +} + +func TestNewRuntimePublishesAcceptedPushAndEmailRoutes(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisServer.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, redisClient.Close()) + }) + userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) { + writeJSON(t, writer, http.StatusOK, map[string]any{ + "user": map[string]any{ + "email": "pilot@example.com", + "preferred_language": "en-US", + }, + }) + }) + defer userService.Close() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisServer.Addr() + cfg.UserService.BaseURL = userService.URL + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 10 * time.Second + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := newTestHTTPClient(t) + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + + messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{ + Stream: cfg.Streams.Intents, + Values: map[string]any{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "recipient_user_ids_json": `["user-1"]`, + "idempotency_key": "game-123:turn-ready", + "occurred_at_ms": "1775121700000", + "payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + "request_id": "request-1", + "trace_id": "trace-1", + }, + }).Result() + require.NoError(t, err) + + require.Eventually(t, func() bool { + pushPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes() + if err != nil { + return false + } + pushRoute, err := redisstate.UnmarshalRoute(pushPayload) + if err != nil { + return false + } + + emailPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes() + if err != nil { + return false + } + emailRoute, err := redisstate.UnmarshalRoute(emailPayload) + if err != nil { + return false + } + + return pushRoute.Status == "published" && pushRoute.AttemptCount == 1 && + emailRoute.Status == "published" && emailRoute.AttemptCount == 1 + }, 2*time.Second, 10*time.Millisecond) + + pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes() + require.NoError(t, err) + pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload) + require.NoError(t, err) + require.Equal(t, "published", string(pushRoute.Status)) + + notificationPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes() + require.NoError(t, err) + notificationRecord, err := redisstate.UnmarshalNotification(notificationPayload) + require.NoError(t, err) + + emailRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes() + require.NoError(t, err) + emailRoute, err := redisstate.UnmarshalRoute(emailRoutePayload) + require.NoError(t, err) + require.Equal(t, "published", string(emailRoute.Status)) + + messages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, "user-1", messages[0].Values["user_id"]) + require.Equal(t, "game.turn.ready", messages[0].Values["event_type"]) + require.Equal(t, messageID+"/push:user:user-1", messages[0].Values["event_id"]) + require.Equal(t, "request-1", messages[0].Values["request_id"]) + require.Equal(t, "trace-1", messages[0].Values["trace_id"]) + require.NotContains(t, messages[0].Values, "device_session_id") + switch payload := messages[0].Values["payload_bytes"].(type) { + case string: + require.NotEmpty(t, payload) + case []byte: + require.NotEmpty(t, payload) + default: + require.Failf(t, "unexpected payload type", "payload_bytes has type %T", payload) + } + + mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result() + require.NoError(t, err) + require.Len(t, mailCommands, 1) + require.Equal(t, messageID+"/email:user:user-1", mailCommands[0].Values["delivery_id"]) + require.Equal(t, "notification", mailCommands[0].Values["source"]) + require.Equal(t, "template", mailCommands[0].Values["payload_mode"]) + require.Equal(t, "notification:"+messageID+"/email:user:user-1", mailCommands[0].Values["idempotency_key"]) + require.Equal(t, strconv.FormatInt(notificationRecord.AcceptedAt.UnixMilli(), 10), mailCommands[0].Values["requested_at_ms"]) + require.Equal(t, "request-1", mailCommands[0].Values["request_id"]) + require.Equal(t, "trace-1", mailCommands[0].Values["trace_id"]) + require.JSONEq(t, + `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`, + mailCommands[0].Values["payload_json"].(string), + ) + + cancel() + waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second) +} + +func TestNewRuntimePublishesAdminEmailRouteOnlyToMailService(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisServer.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, redisClient.Close()) + }) + userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {}) + defer userService.Close() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisServer.Addr() + cfg.UserService.BaseURL = userService.URL + cfg.AdminRouting.LobbyApplicationSubmitted = []string{"owner@example.com"} + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 10 * time.Second + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := newTestHTTPClient(t) + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + + messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{ + Stream: cfg.Streams.Intents, + Values: map[string]any{ + "notification_type": "lobby.application.submitted", + "producer": "game_lobby", + "audience_kind": "admin_email", + "idempotency_key": "game-123:application-submitted:user-42", + "occurred_at_ms": "1775121700000", + "payload_json": `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"}`, + }, + }).Result() + require.NoError(t, err) + + require.Eventually(t, func() bool { + payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:email:owner@example.com")).Bytes() + if err != nil { + return false + } + route, err := redisstate.UnmarshalRoute(payload) + if err != nil { + return false + } + + return route.Status == "published" && route.AttemptCount == 1 + }, 2*time.Second, 10*time.Millisecond) + + pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:email:owner@example.com")).Bytes() + require.NoError(t, err) + pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload) + require.NoError(t, err) + require.Equal(t, "skipped", string(pushRoute.Status)) + + mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result() + require.NoError(t, err) + require.Len(t, mailCommands, 1) + require.Equal(t, messageID+"/email:email:owner@example.com", mailCommands[0].Values["delivery_id"]) + require.JSONEq(t, + `{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"},"attachments":[]}`, + mailCommands[0].Values["payload_json"].(string), + ) + + gatewayMessages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result() + require.NoError(t, err) + require.Empty(t, gatewayMessages) + + cancel() + waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second) +} + +func TestNewRuntimeUsesConfiguredUserServiceTimeout(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + redisClient := redis.NewClient(&redis.Options{ + Addr: redisServer.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, redisClient.Close()) + }) + userService := newUserLookupServer(t, func(_ http.ResponseWriter, request *http.Request) { + <-request.Context().Done() + }) + defer userService.Close() + + cfg := config.DefaultConfig() + cfg.Redis.Addr = redisServer.Addr() + cfg.UserService.BaseURL = userService.URL + cfg.UserService.Timeout = 20 * time.Millisecond + cfg.InternalHTTP.Addr = mustFreeAddr(t) + cfg.ShutdownTimeout = 10 * time.Second + cfg.IntentsReadBlockTimeout = 25 * time.Millisecond + cfg.Telemetry.TracesExporter = "none" + cfg.Telemetry.MetricsExporter = "none" + + runtime, err := NewRuntime(context.Background(), cfg, testLogger()) + require.NoError(t, err) + defer func() { + require.NoError(t, runtime.Close()) + }() + + runCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runErrCh := make(chan error, 1) + go func() { + runErrCh <- runtime.Run(runCtx) + }() + + client := newTestHTTPClient(t) + waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr) + + messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{ + Stream: cfg.Streams.Intents, + Values: map[string]any{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "recipient_user_ids_json": `["user-1"]`, + "idempotency_key": "game-123:turn-ready", + "occurred_at_ms": "1775121700000", + "payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + }, + }).Result() + require.NoError(t, err) + + var runErr error + require.Eventually(t, func() bool { + select { + case runErr = <-runErrCh: + return true + default: + return false + } + }, time.Second, 10*time.Millisecond) + + require.Error(t, runErr) + require.ErrorContains(t, runErr, "context deadline exceeded") + + offsetStore, err := redisstate.NewStreamOffsetStore(redisClient) + require.NoError(t, err) + offset, found, err := offsetStore.Load(context.Background(), cfg.Streams.Intents) + require.NoError(t, err) + require.False(t, found) + require.Empty(t, offset) + + _, err = redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes() + require.Error(t, err) +} + +func testLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +func newTestHTTPClient(t *testing.T) *http.Client { + t.Helper() + + transport := &http.Transport{DisableKeepAlives: true} + t.Cleanup(transport.CloseIdleConnections) + + return &http.Client{ + Timeout: 500 * time.Millisecond, + Transport: transport, + } +} + +func waitForRuntimeReady(t *testing.T, client *http.Client, addr string) { + t.Helper() + + require.Eventually(t, func() bool { + request, err := http.NewRequest(http.MethodGet, "http://"+addr+"/readyz", nil) + if err != nil { + return false + } + + response, err := client.Do(request) + if err != nil { + return false + } + defer response.Body.Close() + _, _ = io.Copy(io.Discard, response.Body) + + return response.StatusCode == http.StatusOK + }, 5*time.Second, 25*time.Millisecond, "notification runtime did not become reachable") +} + +func waitForRunResult(t *testing.T, runErrCh <-chan error, waitTimeout time.Duration) { + t.Helper() + + var err error + require.Eventually(t, func() bool { + select { + case err = <-runErrCh: + return true + default: + return false + } + }, waitTimeout, 10*time.Millisecond, "notification runtime did not stop") + require.NoError(t, err) +} + +func assertHTTPStatus(t *testing.T, client *http.Client, target string, want int) { + t.Helper() + + request, err := http.NewRequest(http.MethodGet, target, nil) + require.NoError(t, err) + + response, err := client.Do(request) + require.NoError(t, err) + defer response.Body.Close() + _, _ = io.Copy(io.Discard, response.Body) + + require.Equal(t, want, response.StatusCode) +} + +func mustFreeAddr(t *testing.T) string { + t.Helper() + + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + defer func() { + assert.NoError(t, listener.Close()) + }() + + return listener.Addr().String() +} + +func newUserLookupServer(t *testing.T, handler func(http.ResponseWriter, *http.Request)) *httptest.Server { + t.Helper() + + return httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + if request.Method != http.MethodGet { + http.NotFound(writer, request) + return + } + if request.URL.Path != "/api/v1/internal/users/user-1" { + writeJSON(t, writer, http.StatusNotFound, map[string]any{ + "error": map[string]any{ + "code": "subject_not_found", + "message": "subject not found", + }, + }) + return + } + + handler(writer, request) + })) +} + +func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) { + t.Helper() + + body, err := json.Marshal(payload) + require.NoError(t, err) + + writer.Header().Set("Content-Type", "application/json") + writer.WriteHeader(statusCode) + _, err = writer.Write(body) + require.NoError(t, err) +} diff --git a/notification/internal/config/config.go b/notification/internal/config/config.go new file mode 100644 index 0000000..33292c0 --- /dev/null +++ b/notification/internal/config/config.go @@ -0,0 +1,839 @@ +// Package config loads the Notification Service process configuration from +// environment variables. +package config + +import ( + "crypto/tls" + "fmt" + "log/slog" + "net" + netmail "net/mail" + "net/url" + "os" + "strconv" + "strings" + "time" + + "galaxy/notification/internal/telemetry" +) + +const ( + shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT" + logLevelEnvVar = "NOTIFICATION_LOG_LEVEL" + + internalHTTPAddrEnvVar = "NOTIFICATION_INTERNAL_HTTP_ADDR" + internalHTTPReadHeaderTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT" + internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT" + internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT" + + redisAddrEnvVar = "NOTIFICATION_REDIS_ADDR" + redisUsernameEnvVar = "NOTIFICATION_REDIS_USERNAME" + redisPasswordEnvVar = "NOTIFICATION_REDIS_PASSWORD" + redisDBEnvVar = "NOTIFICATION_REDIS_DB" + redisTLSEnabledEnvVar = "NOTIFICATION_REDIS_TLS_ENABLED" + redisOperationTimeoutEnvVar = "NOTIFICATION_REDIS_OPERATION_TIMEOUT" + + intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM" + intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT" + gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM" + gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN" + mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM" + + pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS" + emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS" + routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL" + routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN" + routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX" + deadLetterTTLEnvVar = "NOTIFICATION_DEAD_LETTER_TTL" + recordTTLEnvVar = "NOTIFICATION_RECORD_TTL" + idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL" + + userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL" + userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT" + + adminEmailsGeoReviewRecommendedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED" + adminEmailsGameGenerationFailedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED" + adminEmailsLobbyRuntimePausedAfterEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START" + adminEmailsLobbyApplicationSubmittedEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED" + + otelServiceNameEnvVar = "OTEL_SERVICE_NAME" + otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER" + otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER" + otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL" + otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL" + otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL" + otelStdoutTracesEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED" + otelStdoutMetricsEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED" + + defaultShutdownTimeout = 5 * time.Second + defaultLogLevel = "info" + defaultInternalHTTPAddr = ":8092" + defaultReadHeaderTimeout = 2 * time.Second + defaultReadTimeout = 10 * time.Second + defaultIdleTimeout = time.Minute + defaultRedisDB = 0 + defaultRedisOperationTimeout = 250 * time.Millisecond + + defaultIntentsStream = "notification:intents" + defaultIntentsReadBlockTimeout = 2 * time.Second + defaultGatewayClientEventsStream = "gateway:client-events" + defaultGatewayClientEventsStreamMaxLen int64 = 1024 + defaultMailDeliveryCommandsStream = "mail:delivery_commands" + + defaultPushRetryMaxAttempts = 3 + defaultEmailRetryMaxAttempts = 7 + defaultRouteLeaseTTL = 5 * time.Second + defaultRouteBackoffMin = time.Second + defaultRouteBackoffMax = 5 * time.Minute + defaultDeadLetterTTL = 720 * time.Hour + defaultRecordTTL = 720 * time.Hour + defaultIdempotencyTTL = 168 * time.Hour + + defaultUserServiceTimeout = time.Second + defaultOTelServiceName = "galaxy-notification" + + otelExporterNone = "none" + otelExporterOTLP = "otlp" + otelProtocolHTTPProtobuf = "http/protobuf" + otelProtocolGRPC = "grpc" +) + +// Config stores the full Notification Service process configuration. +type Config struct { + // ShutdownTimeout bounds graceful shutdown of every long-lived component. + ShutdownTimeout time.Duration + + // Logging configures the process-wide structured logger. + Logging LoggingConfig + + // InternalHTTP configures the private probe HTTP listener. + InternalHTTP InternalHTTPConfig + + // Redis configures the shared Redis client used by the process. + Redis RedisConfig + + // Streams stores the stable stream names reserved for notification ingress + // and downstream publication. + Streams StreamsConfig + + // IntentsReadBlockTimeout stores the maximum Redis Streams blocking read + // window used by the intent consumer. + IntentsReadBlockTimeout time.Duration + + // Retry stores the frozen retry and retention settings. + Retry RetryConfig + + // UserService configures the trusted user-enrichment dependency. + UserService UserServiceConfig + + // AdminRouting stores the type-specific configured administrator email + // lists. + AdminRouting AdminRoutingConfig + + // Telemetry configures the process-wide OpenTelemetry runtime. + Telemetry TelemetryConfig +} + +// LoggingConfig configures the process-wide structured logger. +type LoggingConfig struct { + // Level stores the process log level accepted by log/slog. + Level string +} + +// InternalHTTPConfig configures the private probe HTTP listener. +type InternalHTTPConfig struct { + // Addr stores the TCP listen address. + Addr string + + // ReadHeaderTimeout bounds request-header reading. + ReadHeaderTimeout time.Duration + + // ReadTimeout bounds reading one request. + ReadTimeout time.Duration + + // IdleTimeout bounds how long keep-alive connections stay open. + IdleTimeout time.Duration +} + +// Validate reports whether cfg stores a usable internal HTTP listener +// configuration. +func (cfg InternalHTTPConfig) Validate() error { + switch { + case strings.TrimSpace(cfg.Addr) == "": + return fmt.Errorf("internal HTTP addr must not be empty") + case !isTCPAddr(cfg.Addr): + return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr) + case cfg.ReadHeaderTimeout <= 0: + return fmt.Errorf("internal HTTP read header timeout must be positive") + case cfg.ReadTimeout <= 0: + return fmt.Errorf("internal HTTP read timeout must be positive") + case cfg.IdleTimeout <= 0: + return fmt.Errorf("internal HTTP idle timeout must be positive") + default: + return nil + } +} + +// RedisConfig configures the shared Redis client and its connection settings. +type RedisConfig struct { + // Addr stores the Redis network address. + Addr string + + // Username stores the optional Redis ACL username. + Username string + + // Password stores the optional Redis ACL password. + Password string + + // DB stores the Redis logical database index. + DB int + + // TLSEnabled reports whether TLS must be used for Redis connections. + TLSEnabled bool + + // OperationTimeout bounds one Redis round trip including the startup PING. + OperationTimeout time.Duration +} + +// TLSConfig returns the conservative TLS configuration used by the Redis +// client when TLSEnabled is true. +func (cfg RedisConfig) TLSConfig() *tls.Config { + if !cfg.TLSEnabled { + return nil + } + + return &tls.Config{MinVersion: tls.VersionTLS12} +} + +// Validate reports whether cfg stores a usable Redis configuration. +func (cfg RedisConfig) Validate() error { + switch { + case strings.TrimSpace(cfg.Addr) == "": + return fmt.Errorf("redis addr must not be empty") + case !isTCPAddr(cfg.Addr): + return fmt.Errorf("redis addr %q must use host:port form", cfg.Addr) + case cfg.DB < 0: + return fmt.Errorf("redis db must not be negative") + case cfg.OperationTimeout <= 0: + return fmt.Errorf("redis operation timeout must be positive") + default: + return nil + } +} + +// StreamsConfig stores the stable Redis Stream names used by Notification +// Service. +type StreamsConfig struct { + // Intents stores the ingress intent stream. + Intents string + + // GatewayClientEvents stores the downstream Gateway client-events stream. + GatewayClientEvents string + + // GatewayClientEventsStreamMaxLen bounds the downstream Gateway + // client-events stream with approximate trimming. + GatewayClientEventsStreamMaxLen int64 + + // MailDeliveryCommands stores the downstream Mail Service command stream. + MailDeliveryCommands string +} + +// Validate reports whether cfg stores usable stream names. +func (cfg StreamsConfig) Validate() error { + switch { + case strings.TrimSpace(cfg.Intents) == "": + return fmt.Errorf("intents stream must not be empty") + case strings.TrimSpace(cfg.GatewayClientEvents) == "": + return fmt.Errorf("gateway client-events stream must not be empty") + case cfg.GatewayClientEventsStreamMaxLen <= 0: + return fmt.Errorf("gateway client-events stream max len must be positive") + case strings.TrimSpace(cfg.MailDeliveryCommands) == "": + return fmt.Errorf("mail delivery-commands stream must not be empty") + default: + return nil + } +} + +// RetryConfig stores the frozen retry budgets, backoff settings, and retention +// periods used by the service. +type RetryConfig struct { + // PushMaxAttempts stores the route retry budget for the `push` channel. + PushMaxAttempts int + + // EmailMaxAttempts stores the route retry budget for the `email` channel. + EmailMaxAttempts int + + // RouteLeaseTTL stores the temporary route-lease lifetime used to avoid + // duplicate publication across replicas. + RouteLeaseTTL time.Duration + + // RouteBackoffMin stores the minimum retry backoff. + RouteBackoffMin time.Duration + + // RouteBackoffMax stores the maximum retry backoff. + RouteBackoffMax time.Duration + + // DeadLetterTTL stores the retention period for dead-letter and malformed + // intent records. + DeadLetterTTL time.Duration + + // RecordTTL stores the retention period for notification and route records. + RecordTTL time.Duration + + // IdempotencyTTL stores the retention period for idempotency records. + IdempotencyTTL time.Duration +} + +// Validate reports whether cfg stores usable retry and retention settings. +func (cfg RetryConfig) Validate() error { + switch { + case cfg.PushMaxAttempts <= 0: + return fmt.Errorf("push retry max attempts must be positive") + case cfg.EmailMaxAttempts <= 0: + return fmt.Errorf("email retry max attempts must be positive") + case cfg.RouteLeaseTTL <= 0: + return fmt.Errorf("route lease ttl must be positive") + case cfg.RouteBackoffMin <= 0: + return fmt.Errorf("route backoff min must be positive") + case cfg.RouteBackoffMax <= 0: + return fmt.Errorf("route backoff max must be positive") + case cfg.RouteBackoffMin > cfg.RouteBackoffMax: + return fmt.Errorf("route backoff min must not exceed route backoff max") + case cfg.DeadLetterTTL <= 0: + return fmt.Errorf("dead-letter ttl must be positive") + case cfg.RecordTTL <= 0: + return fmt.Errorf("record ttl must be positive") + case cfg.IdempotencyTTL <= 0: + return fmt.Errorf("idempotency ttl must be positive") + default: + return nil + } +} + +// UserServiceConfig configures the trusted user-enrichment dependency. +type UserServiceConfig struct { + // BaseURL stores the absolute base URL of the trusted User Service. + BaseURL string + + // Timeout bounds one outbound User Service request. + Timeout time.Duration +} + +// Validate reports whether cfg stores a usable User Service configuration. +func (cfg UserServiceConfig) Validate() error { + switch { + case strings.TrimSpace(cfg.BaseURL) == "": + return fmt.Errorf("user service base URL must not be empty") + case !isAbsoluteHTTPURL(cfg.BaseURL): + return fmt.Errorf("user service base URL %q must be an absolute http(s) URL", cfg.BaseURL) + case cfg.Timeout <= 0: + return fmt.Errorf("user service timeout must be positive") + default: + return nil + } +} + +// AdminRoutingConfig stores the type-specific configured administrator email +// lists. +type AdminRoutingConfig struct { + // GeoReviewRecommended stores recipients for + // `geo.review_recommended`. + GeoReviewRecommended []string + + // GameGenerationFailed stores recipients for + // `game.generation_failed`. + GameGenerationFailed []string + + // LobbyRuntimePausedAfterStart stores recipients for + // `lobby.runtime_paused_after_start`. + LobbyRuntimePausedAfterStart []string + + // LobbyApplicationSubmitted stores recipients for public + // `lobby.application.submitted` notifications. + LobbyApplicationSubmitted []string +} + +// Validate reports whether cfg stores valid normalized administrator email +// lists. +func (cfg AdminRoutingConfig) Validate() error { + if err := validateNormalizedEmailList("geo.review_recommended", cfg.GeoReviewRecommended); err != nil { + return err + } + if err := validateNormalizedEmailList("game.generation_failed", cfg.GameGenerationFailed); err != nil { + return err + } + if err := validateNormalizedEmailList("lobby.runtime_paused_after_start", cfg.LobbyRuntimePausedAfterStart); err != nil { + return err + } + if err := validateNormalizedEmailList("lobby.application.submitted", cfg.LobbyApplicationSubmitted); err != nil { + return err + } + + return nil +} + +// TelemetryConfig configures the Notification Service OpenTelemetry runtime. +type TelemetryConfig struct { + // ServiceName overrides the default OpenTelemetry service name. + ServiceName string + + // TracesExporter selects the external traces exporter. Supported values are + // `none` and `otlp`. + TracesExporter string + + // MetricsExporter selects the external metrics exporter. Supported values + // are `none` and `otlp`. + MetricsExporter string + + // TracesProtocol selects the OTLP traces protocol when TracesExporter is + // `otlp`. + TracesProtocol string + + // MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is + // `otlp`. + MetricsProtocol string + + // StdoutTracesEnabled enables the additional stdout trace exporter used for + // local development and debugging. + StdoutTracesEnabled bool + + // StdoutMetricsEnabled enables the additional stdout metric exporter used + // for local development and debugging. + StdoutMetricsEnabled bool +} + +// Validate reports whether cfg contains a supported OpenTelemetry +// configuration. +func (cfg TelemetryConfig) Validate() error { + return telemetry.ProcessConfig{ + ServiceName: cfg.ServiceName, + TracesExporter: cfg.TracesExporter, + MetricsExporter: cfg.MetricsExporter, + TracesProtocol: cfg.TracesProtocol, + MetricsProtocol: cfg.MetricsProtocol, + StdoutTracesEnabled: cfg.StdoutTracesEnabled, + StdoutMetricsEnabled: cfg.StdoutMetricsEnabled, + }.Validate() +} + +// DefaultConfig returns the default Notification Service process +// configuration. +func DefaultConfig() Config { + return Config{ + ShutdownTimeout: defaultShutdownTimeout, + Logging: LoggingConfig{ + Level: defaultLogLevel, + }, + InternalHTTP: InternalHTTPConfig{ + Addr: defaultInternalHTTPAddr, + ReadHeaderTimeout: defaultReadHeaderTimeout, + ReadTimeout: defaultReadTimeout, + IdleTimeout: defaultIdleTimeout, + }, + Redis: RedisConfig{ + DB: defaultRedisDB, + OperationTimeout: defaultRedisOperationTimeout, + }, + Streams: StreamsConfig{ + Intents: defaultIntentsStream, + GatewayClientEvents: defaultGatewayClientEventsStream, + GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen, + MailDeliveryCommands: defaultMailDeliveryCommandsStream, + }, + IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout, + Retry: RetryConfig{ + PushMaxAttempts: defaultPushRetryMaxAttempts, + EmailMaxAttempts: defaultEmailRetryMaxAttempts, + RouteLeaseTTL: defaultRouteLeaseTTL, + RouteBackoffMin: defaultRouteBackoffMin, + RouteBackoffMax: defaultRouteBackoffMax, + DeadLetterTTL: defaultDeadLetterTTL, + RecordTTL: defaultRecordTTL, + IdempotencyTTL: defaultIdempotencyTTL, + }, + UserService: UserServiceConfig{ + Timeout: defaultUserServiceTimeout, + }, + Telemetry: TelemetryConfig{ + ServiceName: defaultOTelServiceName, + TracesExporter: otelExporterNone, + MetricsExporter: otelExporterNone, + }, + } +} + +// LoadFromEnv loads the Notification Service process configuration from +// environment variables, applying documented defaults where appropriate. +func LoadFromEnv() (Config, error) { + cfg := DefaultConfig() + + var err error + + cfg.ShutdownTimeout, err = loadDurationEnvWithDefault(shutdownTimeoutEnvVar, cfg.ShutdownTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.Logging.Level = loadStringEnvWithDefault(logLevelEnvVar, cfg.Logging.Level) + if err := validateLogLevel(cfg.Logging.Level); err != nil { + return Config{}, fmt.Errorf("load notification config: %s: %w", logLevelEnvVar, err) + } + + cfg.InternalHTTP.Addr = loadStringEnvWithDefault(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr) + cfg.InternalHTTP.ReadHeaderTimeout, err = loadDurationEnvWithDefault(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.InternalHTTP.ReadTimeout, err = loadDurationEnvWithDefault(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.InternalHTTP.IdleTimeout, err = loadDurationEnvWithDefault(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.Redis.Addr = loadStringEnvWithDefault(redisAddrEnvVar, cfg.Redis.Addr) + cfg.Redis.Username = os.Getenv(redisUsernameEnvVar) + cfg.Redis.Password = os.Getenv(redisPasswordEnvVar) + cfg.Redis.DB, err = loadIntEnvWithDefault(redisDBEnvVar, cfg.Redis.DB) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Redis.TLSEnabled, err = loadBoolEnvWithDefault(redisTLSEnabledEnvVar, cfg.Redis.TLSEnabled) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Redis.OperationTimeout, err = loadDurationEnvWithDefault(redisOperationTimeoutEnvVar, cfg.Redis.OperationTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.Streams.Intents = loadStringEnvWithDefault(intentsStreamEnvVar, cfg.Streams.Intents) + cfg.Streams.GatewayClientEvents = loadStringEnvWithDefault(gatewayClientEventsStreamEnvVar, cfg.Streams.GatewayClientEvents) + cfg.Streams.GatewayClientEventsStreamMaxLen, err = loadInt64EnvWithDefault(gatewayClientEventsStreamMaxEnvVar, cfg.Streams.GatewayClientEventsStreamMaxLen) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Streams.MailDeliveryCommands = loadStringEnvWithDefault(mailDeliveryCommandsStreamEnvVar, cfg.Streams.MailDeliveryCommands) + cfg.IntentsReadBlockTimeout, err = loadDurationEnvWithDefault(intentsReadBlockTimeoutEnvVar, cfg.IntentsReadBlockTimeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.Retry.PushMaxAttempts, err = loadIntEnvWithDefault(pushRetryMaxAttemptsEnvVar, cfg.Retry.PushMaxAttempts) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.EmailMaxAttempts, err = loadIntEnvWithDefault(emailRetryMaxAttemptsEnvVar, cfg.Retry.EmailMaxAttempts) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.RouteLeaseTTL, err = loadDurationEnvWithDefault(routeLeaseTTLEnvVar, cfg.Retry.RouteLeaseTTL) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.RouteBackoffMin, err = loadDurationEnvWithDefault(routeBackoffMinEnvVar, cfg.Retry.RouteBackoffMin) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.RouteBackoffMax, err = loadDurationEnvWithDefault(routeBackoffMaxEnvVar, cfg.Retry.RouteBackoffMax) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.DeadLetterTTL, err = loadDurationEnvWithDefault(deadLetterTTLEnvVar, cfg.Retry.DeadLetterTTL) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.RecordTTL, err = loadDurationEnvWithDefault(recordTTLEnvVar, cfg.Retry.RecordTTL) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Retry.IdempotencyTTL, err = loadDurationEnvWithDefault(idempotencyTTLEnvVar, cfg.Retry.IdempotencyTTL) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.UserService.BaseURL = normalizeBaseURL(loadStringEnvWithDefault(userServiceBaseURLEnvVar, cfg.UserService.BaseURL)) + cfg.UserService.Timeout, err = loadDurationEnvWithDefault(userServiceTimeoutEnvVar, cfg.UserService.Timeout) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.AdminRouting.GeoReviewRecommended, err = loadEmailListEnv(adminEmailsGeoReviewRecommendedEnvVar, cfg.AdminRouting.GeoReviewRecommended) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.AdminRouting.GameGenerationFailed, err = loadEmailListEnv(adminEmailsGameGenerationFailedEnvVar, cfg.AdminRouting.GameGenerationFailed) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.AdminRouting.LobbyRuntimePausedAfterStart, err = loadEmailListEnv(adminEmailsLobbyRuntimePausedAfterEnvVar, cfg.AdminRouting.LobbyRuntimePausedAfterStart) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.AdminRouting.LobbyApplicationSubmitted, err = loadEmailListEnv(adminEmailsLobbyApplicationSubmittedEnvVar, cfg.AdminRouting.LobbyApplicationSubmitted) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + cfg.Telemetry.ServiceName = loadStringEnvWithDefault(otelServiceNameEnvVar, cfg.Telemetry.ServiceName) + cfg.Telemetry.TracesExporter = normalizeExporterValue(loadStringEnvWithDefault(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter)) + cfg.Telemetry.MetricsExporter = normalizeExporterValue(loadStringEnvWithDefault(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter)) + cfg.Telemetry.TracesProtocol = loadOTLPProtocol( + os.Getenv(otelExporterOTLPTracesProtocolEnvVar), + os.Getenv(otelExporterOTLPProtocolEnvVar), + cfg.Telemetry.TracesExporter, + ) + cfg.Telemetry.MetricsProtocol = loadOTLPProtocol( + os.Getenv(otelExporterOTLPMetricsProtocolEnvVar), + os.Getenv(otelExporterOTLPProtocolEnvVar), + cfg.Telemetry.MetricsExporter, + ) + cfg.Telemetry.StdoutTracesEnabled, err = loadBoolEnvWithDefault(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + cfg.Telemetry.StdoutMetricsEnabled, err = loadBoolEnvWithDefault(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled) + if err != nil { + return Config{}, fmt.Errorf("load notification config: %w", err) + } + + if err := cfg.Validate(); err != nil { + return Config{}, err + } + + return cfg, nil +} + +// Validate reports whether cfg contains a consistent Notification Service +// process configuration. +func (cfg Config) Validate() error { + switch { + case cfg.ShutdownTimeout <= 0: + return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar) + case strings.TrimSpace(cfg.Redis.Addr) == "": + return fmt.Errorf("load notification config: %s must not be empty", redisAddrEnvVar) + case strings.TrimSpace(cfg.UserService.BaseURL) == "": + return fmt.Errorf("load notification config: %s must not be empty", userServiceBaseURLEnvVar) + } + + if err := cfg.InternalHTTP.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if err := cfg.Redis.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if err := cfg.Streams.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if cfg.IntentsReadBlockTimeout <= 0 { + return fmt.Errorf("load notification config: %s must be positive", intentsReadBlockTimeoutEnvVar) + } + if err := cfg.Retry.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if err := cfg.UserService.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if err := cfg.AdminRouting.Validate(); err != nil { + return fmt.Errorf("load notification config: %s", err) + } + if err := cfg.Telemetry.Validate(); err != nil { + return fmt.Errorf("load notification config: %w", err) + } + + return nil +} + +func loadStringEnvWithDefault(name string, value string) string { + if raw, ok := os.LookupEnv(name); ok { + return strings.TrimSpace(raw) + } + + return value +} + +func loadDurationEnvWithDefault(name string, value time.Duration) (time.Duration, error) { + raw, ok := os.LookupEnv(name) + if !ok { + return value, nil + } + + parsed, err := time.ParseDuration(strings.TrimSpace(raw)) + if err != nil { + return 0, fmt.Errorf("%s: %w", name, err) + } + + return parsed, nil +} + +func loadIntEnvWithDefault(name string, value int) (int, error) { + raw, ok := os.LookupEnv(name) + if !ok { + return value, nil + } + + parsed, err := strconv.Atoi(strings.TrimSpace(raw)) + if err != nil { + return 0, fmt.Errorf("%s: %w", name, err) + } + + return parsed, nil +} + +func loadInt64EnvWithDefault(name string, value int64) (int64, error) { + raw, ok := os.LookupEnv(name) + if !ok { + return value, nil + } + + parsed, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64) + if err != nil { + return 0, fmt.Errorf("%s: %w", name, err) + } + + return parsed, nil +} + +func loadBoolEnvWithDefault(name string, value bool) (bool, error) { + raw, ok := os.LookupEnv(name) + if !ok { + return value, nil + } + + parsed, err := strconv.ParseBool(strings.TrimSpace(raw)) + if err != nil { + return false, fmt.Errorf("%s: %w", name, err) + } + + return parsed, nil +} + +func loadEmailListEnv(name string, value []string) ([]string, error) { + raw, ok := os.LookupEnv(name) + if !ok { + return append([]string(nil), value...), nil + } + + return parseEmailList(name, raw) +} + +func parseEmailList(name string, raw string) ([]string, error) { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return nil, nil + } + + parts := strings.Split(trimmed, ",") + addresses := make([]string, 0, len(parts)) + seen := make(map[string]struct{}, len(parts)) + for index, part := range parts { + normalized, err := normalizeMailboxAddress(part) + if err != nil { + return nil, fmt.Errorf("%s[%d]: %w", name, index, err) + } + if _, ok := seen[normalized]; ok { + continue + } + seen[normalized] = struct{}{} + addresses = append(addresses, normalized) + } + + return addresses, nil +} + +func normalizeMailboxAddress(value string) (string, error) { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return "", fmt.Errorf("email address must not be empty") + } + + parsed, err := netmail.ParseAddress(trimmed) + if err != nil { + return "", fmt.Errorf("invalid email address %q: %w", trimmed, err) + } + if parsed.Name != "" { + return "", fmt.Errorf("email address %q must not include a display name", trimmed) + } + + return strings.ToLower(parsed.Address), nil +} + +func validateNormalizedEmailList(name string, values []string) error { + for index, value := range values { + normalized, err := normalizeMailboxAddress(value) + if err != nil { + return fmt.Errorf("%s[%d]: %w", name, index, err) + } + if normalized != value { + return fmt.Errorf("%s[%d]: email address must already be normalized", name, index) + } + } + + return nil +} + +func validateLogLevel(value string) error { + var level slog.Level + return level.UnmarshalText([]byte(strings.TrimSpace(value))) +} + +func normalizeExporterValue(value string) string { + switch strings.TrimSpace(value) { + case "", otelExporterNone: + return otelExporterNone + default: + return strings.TrimSpace(value) + } +} + +func loadOTLPProtocol(primary string, fallback string, exporter string) string { + protocol := strings.TrimSpace(primary) + if protocol == "" { + protocol = strings.TrimSpace(fallback) + } + if protocol == "" && exporter == otelExporterOTLP { + return otelProtocolHTTPProtobuf + } + + return protocol +} + +func normalizeBaseURL(value string) string { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return "" + } + + return strings.TrimRight(trimmed, "/") +} + +func isAbsoluteHTTPURL(value string) bool { + parsed, err := url.Parse(strings.TrimSpace(value)) + if err != nil { + return false + } + + if parsed.Scheme != "http" && parsed.Scheme != "https" { + return false + } + + return parsed.Host != "" +} + +func isTCPAddr(value string) bool { + host, port, err := net.SplitHostPort(strings.TrimSpace(value)) + if err != nil { + return false + } + + if port == "" { + return false + } + if host == "" { + return true + } + + return true +} diff --git a/notification/internal/config/config_test.go b/notification/internal/config/config_test.go new file mode 100644 index 0000000..269927e --- /dev/null +++ b/notification/internal/config/config_test.go @@ -0,0 +1,252 @@ +package config + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestLoadFromEnvUsesDefaults(t *testing.T) { + t.Setenv(redisAddrEnvVar, "127.0.0.1:6379") + t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal") + + cfg, err := LoadFromEnv() + require.NoError(t, err) + + defaults := DefaultConfig() + require.Equal(t, defaults.ShutdownTimeout, cfg.ShutdownTimeout) + require.Equal(t, defaults.Logging, cfg.Logging) + require.Equal(t, defaults.InternalHTTP, cfg.InternalHTTP) + require.Equal(t, "127.0.0.1:6379", cfg.Redis.Addr) + require.Equal(t, defaults.Redis.DB, cfg.Redis.DB) + require.Equal(t, defaults.Redis.OperationTimeout, cfg.Redis.OperationTimeout) + require.Equal(t, defaults.Streams, cfg.Streams) + require.Equal(t, defaults.Retry, cfg.Retry) + require.Equal(t, UserServiceConfig{ + BaseURL: "http://user-service.internal", + Timeout: defaults.UserService.Timeout, + }, cfg.UserService) + require.Equal(t, defaults.AdminRouting, cfg.AdminRouting) + require.Equal(t, defaults.Telemetry, cfg.Telemetry) +} + +func TestLoadFromEnvAppliesOverrides(t *testing.T) { + t.Setenv(shutdownTimeoutEnvVar, "9s") + t.Setenv(logLevelEnvVar, "debug") + t.Setenv(internalHTTPAddrEnvVar, "127.0.0.1:18092") + t.Setenv(internalHTTPReadHeaderTimeoutEnvVar, "3s") + t.Setenv(internalHTTPReadTimeoutEnvVar, "11s") + t.Setenv(internalHTTPIdleTimeoutEnvVar, "61s") + t.Setenv(redisAddrEnvVar, "127.0.0.1:6380") + t.Setenv(redisUsernameEnvVar, "alice") + t.Setenv(redisPasswordEnvVar, "secret") + t.Setenv(redisDBEnvVar, "3") + t.Setenv(redisTLSEnabledEnvVar, "true") + t.Setenv(redisOperationTimeoutEnvVar, "750ms") + t.Setenv(intentsStreamEnvVar, "notification:test_intents") + t.Setenv(intentsReadBlockTimeoutEnvVar, "3500ms") + t.Setenv(gatewayClientEventsStreamEnvVar, "gateway:test_client-events") + t.Setenv(gatewayClientEventsStreamMaxEnvVar, "2048") + t.Setenv(mailDeliveryCommandsStreamEnvVar, "mail:test_delivery_commands") + t.Setenv(pushRetryMaxAttemptsEnvVar, "5") + t.Setenv(emailRetryMaxAttemptsEnvVar, "9") + t.Setenv(routeLeaseTTLEnvVar, "7s") + t.Setenv(routeBackoffMinEnvVar, "2s") + t.Setenv(routeBackoffMaxEnvVar, "7m") + t.Setenv(deadLetterTTLEnvVar, "120h") + t.Setenv(recordTTLEnvVar, "240h") + t.Setenv(idempotencyTTLEnvVar, "48h") + t.Setenv(userServiceBaseURLEnvVar, "https://user-service.internal/api/") + t.Setenv(userServiceTimeoutEnvVar, "1500ms") + t.Setenv(adminEmailsGeoReviewRecommendedEnvVar, "First@example.com, second@example.com, first@example.com") + t.Setenv(adminEmailsGameGenerationFailedEnvVar, "ops@example.com") + t.Setenv(adminEmailsLobbyRuntimePausedAfterEnvVar, "pause@example.com, PAUSE@example.com") + t.Setenv(adminEmailsLobbyApplicationSubmittedEnvVar, "owner@example.com, OWNER@example.com") + t.Setenv(otelServiceNameEnvVar, "custom-notification") + t.Setenv(otelTracesExporterEnvVar, "otlp") + t.Setenv(otelMetricsExporterEnvVar, "otlp") + t.Setenv(otelExporterOTLPProtocolEnvVar, "grpc") + t.Setenv(otelStdoutTracesEnabledEnvVar, "true") + t.Setenv(otelStdoutMetricsEnabledEnvVar, "true") + + cfg, err := LoadFromEnv() + require.NoError(t, err) + + require.Equal(t, 9*time.Second, cfg.ShutdownTimeout) + require.Equal(t, "debug", cfg.Logging.Level) + require.Equal(t, InternalHTTPConfig{ + Addr: "127.0.0.1:18092", + ReadHeaderTimeout: 3 * time.Second, + ReadTimeout: 11 * time.Second, + IdleTimeout: 61 * time.Second, + }, cfg.InternalHTTP) + require.Equal(t, RedisConfig{ + Addr: "127.0.0.1:6380", + Username: "alice", + Password: "secret", + DB: 3, + TLSEnabled: true, + OperationTimeout: 750 * time.Millisecond, + }, cfg.Redis) + require.Equal(t, StreamsConfig{ + Intents: "notification:test_intents", + GatewayClientEvents: "gateway:test_client-events", + GatewayClientEventsStreamMaxLen: 2048, + MailDeliveryCommands: "mail:test_delivery_commands", + }, cfg.Streams) + require.Equal(t, 3500*time.Millisecond, cfg.IntentsReadBlockTimeout) + require.Equal(t, RetryConfig{ + PushMaxAttempts: 5, + EmailMaxAttempts: 9, + RouteLeaseTTL: 7 * time.Second, + RouteBackoffMin: 2 * time.Second, + RouteBackoffMax: 7 * time.Minute, + DeadLetterTTL: 120 * time.Hour, + RecordTTL: 240 * time.Hour, + IdempotencyTTL: 48 * time.Hour, + }, cfg.Retry) + require.Equal(t, UserServiceConfig{ + BaseURL: "https://user-service.internal/api", + Timeout: 1500 * time.Millisecond, + }, cfg.UserService) + require.Equal(t, AdminRoutingConfig{ + GeoReviewRecommended: []string{"first@example.com", "second@example.com"}, + GameGenerationFailed: []string{"ops@example.com"}, + LobbyRuntimePausedAfterStart: []string{"pause@example.com"}, + LobbyApplicationSubmitted: []string{"owner@example.com"}, + }, cfg.AdminRouting) + require.Equal(t, TelemetryConfig{ + ServiceName: "custom-notification", + TracesExporter: "otlp", + MetricsExporter: "otlp", + TracesProtocol: "grpc", + MetricsProtocol: "grpc", + StdoutTracesEnabled: true, + StdoutMetricsEnabled: true, + }, cfg.Telemetry) +} + +func TestLoadFromEnvRejectsInvalidValues(t *testing.T) { + tests := []struct { + name string + envName string + envVal string + }{ + {name: "invalid duration", envName: shutdownTimeoutEnvVar, envVal: "later"}, + {name: "invalid log level", envName: logLevelEnvVar, envVal: "verbose"}, + {name: "invalid redis db", envName: redisDBEnvVar, envVal: "db-three"}, + {name: "invalid redis tls", envName: redisTLSEnabledEnvVar, envVal: "sometimes"}, + {name: "invalid push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "many"}, + {name: "invalid email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "several"}, + {name: "invalid gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "many"}, + {name: "invalid user service timeout", envName: userServiceTimeoutEnvVar, envVal: "soon"}, + {name: "invalid intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "later"}, + {name: "invalid route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "eventually"}, + {name: "invalid traces exporter", envName: otelTracesExporterEnvVar, envVal: "stdout"}, + {name: "invalid metrics protocol", envName: otelExporterOTLPMetricsProtocolEnvVar, envVal: "udp"}, + {name: "invalid stdout traces", envName: otelStdoutTracesEnabledEnvVar, envVal: "sometimes"}, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Setenv(redisAddrEnvVar, "127.0.0.1:6379") + t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal") + t.Setenv(tt.envName, tt.envVal) + + _, err := LoadFromEnv() + require.Error(t, err) + }) + } +} + +func TestLoadFromEnvRejectsMissingRequiredValues(t *testing.T) { + t.Run("missing redis addr", func(t *testing.T) { + t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal") + + _, err := LoadFromEnv() + require.Error(t, err) + require.Contains(t, err.Error(), redisAddrEnvVar) + }) + + t.Run("missing user service base url", func(t *testing.T) { + t.Setenv(redisAddrEnvVar, "127.0.0.1:6379") + + _, err := LoadFromEnv() + require.Error(t, err) + require.Contains(t, err.Error(), userServiceBaseURLEnvVar) + }) +} + +func TestLoadFromEnvRejectsInvalidConfiguration(t *testing.T) { + tests := []struct { + name string + envName string + envVal string + want string + }{ + {name: "invalid internal http addr", envName: internalHTTPAddrEnvVar, envVal: "127.0.0.1", want: "internal HTTP addr"}, + {name: "invalid redis addr", envName: redisAddrEnvVar, envVal: "127.0.0.1", want: "redis addr"}, + {name: "relative user service url", envName: userServiceBaseURLEnvVar, envVal: "/internal/users", want: "absolute http(s) URL"}, + {name: "invalid admin email", envName: adminEmailsGeoReviewRecommendedEnvVar, envVal: "broken-email", want: "invalid email address"}, + {name: "blank admin email slot", envName: adminEmailsGameGenerationFailedEnvVar, envVal: "ops@example.com, , second@example.com", want: "must not be empty"}, + {name: "invalid public application admin email", envName: adminEmailsLobbyApplicationSubmittedEnvVar, envVal: "Owner ", want: "must not include a display name"}, + {name: "nonpositive gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0", want: "must be positive"}, + {name: "backoff min above max", envName: routeBackoffMinEnvVar, envVal: "10m", want: "must not exceed"}, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Setenv(redisAddrEnvVar, "127.0.0.1:6379") + t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal") + t.Setenv(routeBackoffMaxEnvVar, "5m") + t.Setenv(tt.envName, tt.envVal) + + _, err := LoadFromEnv() + require.Error(t, err) + require.Contains(t, err.Error(), tt.want) + }) + } +} + +func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) { + tests := []struct { + name string + envName string + envVal string + }{ + {name: "shutdown timeout", envName: shutdownTimeoutEnvVar, envVal: "0s"}, + {name: "read header timeout", envName: internalHTTPReadHeaderTimeoutEnvVar, envVal: "0s"}, + {name: "read timeout", envName: internalHTTPReadTimeoutEnvVar, envVal: "0s"}, + {name: "idle timeout", envName: internalHTTPIdleTimeoutEnvVar, envVal: "0s"}, + {name: "redis timeout", envName: redisOperationTimeoutEnvVar, envVal: "0s"}, + {name: "intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "0s"}, + {name: "push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "0"}, + {name: "email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "0"}, + {name: "gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0"}, + {name: "route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "0s"}, + {name: "route backoff min", envName: routeBackoffMinEnvVar, envVal: "0s"}, + {name: "route backoff max", envName: routeBackoffMaxEnvVar, envVal: "0s"}, + {name: "dead letter ttl", envName: deadLetterTTLEnvVar, envVal: "0s"}, + {name: "record ttl", envName: recordTTLEnvVar, envVal: "0s"}, + {name: "idempotency ttl", envName: idempotencyTTLEnvVar, envVal: "0s"}, + {name: "user service timeout", envName: userServiceTimeoutEnvVar, envVal: "0s"}, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Setenv(redisAddrEnvVar, "127.0.0.1:6379") + t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal") + t.Setenv(tt.envName, tt.envVal) + + _, err := LoadFromEnv() + require.Error(t, err) + }) + } +} diff --git a/notification/internal/logging/logger.go b/notification/internal/logging/logger.go new file mode 100644 index 0000000..058cae1 --- /dev/null +++ b/notification/internal/logging/logger.go @@ -0,0 +1,112 @@ +// Package logging configures the Notification Service process logger and +// provides context-aware helpers for trace fields. +package logging + +import ( + "context" + "fmt" + "log/slog" + "os" + "strings" + + "galaxy/notification/internal/api/intentstream" + + "go.opentelemetry.io/otel/trace" +) + +// New constructs the process-wide JSON logger from level. +func New(level string) (*slog.Logger, error) { + var slogLevel slog.Level + if err := slogLevel.UnmarshalText([]byte(strings.TrimSpace(level))); err != nil { + return nil, fmt.Errorf("build logger: %w", err) + } + + return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slogLevel, + })), nil +} + +// TraceAttrsFromContext returns slog key-value pairs for the active +// OpenTelemetry span when ctx carries a valid span context. +func TraceAttrsFromContext(ctx context.Context) []any { + if ctx == nil { + return nil + } + + spanContext := trace.SpanContextFromContext(ctx) + if !spanContext.IsValid() { + return nil + } + + return []any{ + "otel_trace_id", spanContext.TraceID().String(), + "otel_span_id", spanContext.SpanID().String(), + } +} + +// NotificationAttrs returns structured notification-identifying log fields. +func NotificationAttrs( + notificationID string, + notificationType intentstream.NotificationType, + producer intentstream.Producer, + audienceKind intentstream.AudienceKind, + idempotencyKey string, + requestID string, + traceID string, +) []any { + attrs := []any{ + "notification_id", notificationID, + "notification_type", string(notificationType), + "producer", string(producer), + "audience_kind", string(audienceKind), + "idempotency_key", idempotencyKey, + } + if strings.TrimSpace(requestID) != "" { + attrs = append(attrs, "request_id", requestID) + } + if strings.TrimSpace(traceID) != "" { + attrs = append(attrs, "trace_id", traceID) + } + + return attrs +} + +// IntentAttrs returns structured intent-identifying log fields when a durable +// notification record does not yet exist. +func IntentAttrs(intent intentstream.Intent) []any { + attrs := []any{ + "notification_type", string(intent.NotificationType), + "producer", string(intent.Producer), + "audience_kind", string(intent.AudienceKind), + "idempotency_key", intent.IdempotencyKey, + } + if strings.TrimSpace(intent.RequestID) != "" { + attrs = append(attrs, "request_id", intent.RequestID) + } + if strings.TrimSpace(intent.TraceID) != "" { + attrs = append(attrs, "trace_id", intent.TraceID) + } + + return attrs +} + +// RouteAttrs returns structured route-identifying log fields. +func RouteAttrs( + notificationID string, + notificationType intentstream.NotificationType, + producer intentstream.Producer, + audienceKind intentstream.AudienceKind, + idempotencyKey string, + requestID string, + traceID string, + routeID string, + channel intentstream.Channel, +) []any { + attrs := NotificationAttrs(notificationID, notificationType, producer, audienceKind, idempotencyKey, requestID, traceID) + attrs = append(attrs, + "route_id", routeID, + "channel", string(channel), + ) + + return attrs +} diff --git a/notification/internal/service/acceptintent/service.go b/notification/internal/service/acceptintent/service.go new file mode 100644 index 0000000..23a7f46 --- /dev/null +++ b/notification/internal/service/acceptintent/service.go @@ -0,0 +1,946 @@ +// Package acceptintent implements durable idempotent acceptance of normalized +// notification intents. +package acceptintent + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "log/slog" + netmail "net/mail" + "strings" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/config" + "galaxy/notification/internal/logging" +) + +var ( + // ErrConflict reports that an idempotency scope already exists for + // different normalized content. + ErrConflict = errors.New("accept intent conflict") + + // ErrRecipientNotFound reports that at least one user-targeted recipient + // does not exist in the trusted User Service directory. + ErrRecipientNotFound = errors.New("accept intent recipient not found") + + // ErrServiceUnavailable reports that durable acceptance could not be + // completed or recovered safely. + ErrServiceUnavailable = errors.New("accept intent service unavailable") +) + +// Outcome identifies the coarse intent-acceptance outcome. +type Outcome string + +const ( + // OutcomeAccepted reports that the intent was durably accepted into local + // notification state. + OutcomeAccepted Outcome = "accepted" + + // OutcomeDuplicate reports that the intent matched already accepted + // normalized content and therefore became a replay no-op. + OutcomeDuplicate Outcome = "duplicate" +) + +// RouteStatus identifies one stable notification-route state. +type RouteStatus string + +const ( + // RouteStatusPending reports that the route is ready for first publication. + RouteStatusPending RouteStatus = "pending" + + // RouteStatusPublished reports that the route was durably handed off. + RouteStatusPublished RouteStatus = "published" + + // RouteStatusFailed reports that the last publish attempt failed and a + // retry is scheduled. + RouteStatusFailed RouteStatus = "failed" + + // RouteStatusDeadLetter reports that the route exhausted its retry budget. + RouteStatusDeadLetter RouteStatus = "dead_letter" + + // RouteStatusSkipped reports that the route slot was durably materialized + // but intentionally not emitted. + RouteStatusSkipped RouteStatus = "skipped" +) + +// Result stores the coarse outcome of one intent-acceptance attempt. +type Result struct { + // Outcome stores the stable intent-acceptance outcome. + Outcome Outcome +} + +// NotificationRecord stores the primary durable notification record accepted +// from one normalized intent. +type NotificationRecord struct { + // NotificationID stores the stable notification identifier. + NotificationID string + + // NotificationType stores the frozen notification vocabulary value. + NotificationType intentstream.NotificationType + + // Producer stores the frozen producer identifier. + Producer intentstream.Producer + + // AudienceKind stores the normalized audience selector. + AudienceKind intentstream.AudienceKind + + // RecipientUserIDs stores the normalized recipient user set for + // user-targeted intents. + RecipientUserIDs []string + + // PayloadJSON stores the canonical normalized payload JSON string. + PayloadJSON string + + // IdempotencyKey stores the producer-owned idempotency key. + IdempotencyKey string + + // RequestFingerprint stores the stable normalized request fingerprint. + RequestFingerprint string + + // RequestID stores the optional tracing request identifier. + RequestID string + + // TraceID stores the optional tracing trace identifier. + TraceID string + + // OccurredAt stores when the producer says the event happened. + OccurredAt time.Time + + // AcceptedAt stores when Notification Service durably accepted the intent. + AcceptedAt time.Time + + // UpdatedAt stores the last notification-record mutation timestamp. + UpdatedAt time.Time +} + +// NotificationRoute stores one durable route slot derived from an accepted +// notification. +type NotificationRoute struct { + // NotificationID stores the owning notification identifier. + NotificationID string + + // RouteID stores the stable `:` identifier. + RouteID string + + // Channel stores the route channel slot. + Channel intentstream.Channel + + // RecipientRef stores the stable target slot identifier. + RecipientRef string + + // Status stores the current route status. + Status RouteStatus + + // AttemptCount stores how many publication attempts already ran. + AttemptCount int + + // MaxAttempts stores the total retry budget for Channel. + MaxAttempts int + + // NextAttemptAt stores the next scheduled publication time when Status is + // RouteStatusPending or RouteStatusFailed. + NextAttemptAt time.Time + + // ResolvedEmail stores the already-known email target when available. + ResolvedEmail string + + // ResolvedLocale stores the already-known locale when available. + ResolvedLocale string + + // LastErrorClassification stores the optional last classified route error. + LastErrorClassification string + + // LastErrorMessage stores the optional last route error message. + LastErrorMessage string + + // LastErrorAt stores when the last route error happened. + LastErrorAt time.Time + + // CreatedAt stores when the route was materialized. + CreatedAt time.Time + + // UpdatedAt stores the last route mutation timestamp. + UpdatedAt time.Time + + // PublishedAt stores when the route reached published. + PublishedAt time.Time + + // DeadLetteredAt stores when the route reached dead_letter. + DeadLetteredAt time.Time + + // SkippedAt stores when the route reached skipped. + SkippedAt time.Time +} + +// IdempotencyRecord stores one durable `(producer, idempotency_key)` +// reservation. +type IdempotencyRecord struct { + // Producer stores the owning producer identifier. + Producer intentstream.Producer + + // IdempotencyKey stores the producer-owned idempotency key. + IdempotencyKey string + + // NotificationID stores the accepted notification identifier. + NotificationID string + + // RequestFingerprint stores the stable normalized request fingerprint. + RequestFingerprint string + + // CreatedAt stores when the reservation was created. + CreatedAt time.Time + + // ExpiresAt stores when the reservation expires. + ExpiresAt time.Time +} + +// AcceptInput stores one normalized intent plus its chosen notification +// identifier. +type AcceptInput struct { + // NotificationID stores the stable accepted notification identifier. + NotificationID string + + // Intent stores the normalized decoded ingress intent. + Intent intentstream.Intent +} + +// CreateAcceptanceInput stores the durable write set required to accept one +// notification intent. +type CreateAcceptanceInput struct { + // Notification stores the accepted notification record. + Notification NotificationRecord + + // Routes stores every durable route slot derived from Notification. + Routes []NotificationRoute + + // Idempotency stores the idempotency reservation bound to Notification. + Idempotency IdempotencyRecord +} + +// Store describes the durable storage required by the intent-acceptance use +// case. +type Store interface { + // CreateAcceptance stores the complete durable write set for one intent + // acceptance attempt. Implementations must wrap ErrConflict when the write + // set races with already accepted state. + CreateAcceptance(context.Context, CreateAcceptanceInput) error + + // GetIdempotency loads one existing idempotency reservation. + GetIdempotency(context.Context, intentstream.Producer, string) (IdempotencyRecord, bool, error) + + // GetNotification loads one accepted notification by NotificationID. + GetNotification(context.Context, string) (NotificationRecord, bool, error) +} + +// UserRecord stores the enrichment data resolved for one recipient user. +type UserRecord struct { + // Email stores the current user email address. + Email string + + // PreferredLanguage stores the current user preferred language tag. + PreferredLanguage string +} + +// Validate reports whether record contains usable recipient enrichment data. +func (record UserRecord) Validate() error { + if strings.TrimSpace(record.Email) == "" { + return errors.New("user record email must not be empty") + } + if _, err := netmail.ParseAddress(record.Email); err != nil { + return fmt.Errorf("user record email: %w", err) + } + + return nil +} + +// UserDirectory resolves trusted recipient data from User Service. Missing +// users must wrap ErrRecipientNotFound. Other failures are treated as +// dependency unavailability. +type UserDirectory interface { + // GetUserByID loads one user by stable user identifier. + GetUserByID(context.Context, string) (UserRecord, error) +} + +// Telemetry records low-cardinality intent-acceptance and user-enrichment +// outcomes. +type Telemetry interface { + // RecordIntentOutcome records one accepted notification-intent outcome. + RecordIntentOutcome(context.Context, string, string, string, string) + + // RecordUserEnrichmentAttempt records one User Service enrichment lookup + // outcome. + RecordUserEnrichmentAttempt(context.Context, string, string) +} + +// Clock provides the current wall-clock time. +type Clock interface { + // Now returns the current time. + Now() time.Time +} + +type systemClock struct{} + +func (systemClock) Now() time.Time { + return time.Now() +} + +// Config stores the dependencies and policies used by Service. +type Config struct { + // Store owns the durable accepted state. + Store Store + + // UserDirectory resolves recipient email and locale from User Service. + UserDirectory UserDirectory + + // Clock provides wall-clock timestamps. + Clock Clock + + // Logger writes structured acceptance logs. + Logger *slog.Logger + + // Telemetry records low-cardinality acceptance and enrichment outcomes. + Telemetry Telemetry + + // PushMaxAttempts stores the retry budget for push routes. + PushMaxAttempts int + + // EmailMaxAttempts stores the retry budget for email routes. + EmailMaxAttempts int + + // IdempotencyTTL stores how long accepted idempotency scopes remain valid. + IdempotencyTTL time.Duration + + // AdminRouting stores the type-specific administrator email lists. + AdminRouting config.AdminRoutingConfig +} + +// Service durably accepts normalized notification intents. +type Service struct { + store Store + userDirectory UserDirectory + clock Clock + logger *slog.Logger + telemetry Telemetry + pushMaxAttempts int + emailMaxAttempts int + idempotencyTTL time.Duration + adminRouting config.AdminRoutingConfig +} + +// New constructs Service from cfg. +func New(cfg Config) (*Service, error) { + if cfg.Store == nil { + return nil, errors.New("new accept intent service: nil store") + } + if cfg.UserDirectory == nil { + return nil, errors.New("new accept intent service: nil user directory") + } + if cfg.Clock == nil { + cfg.Clock = systemClock{} + } + if cfg.PushMaxAttempts <= 0 { + return nil, errors.New("new accept intent service: push max attempts must be positive") + } + if cfg.EmailMaxAttempts <= 0 { + return nil, errors.New("new accept intent service: email max attempts must be positive") + } + if cfg.IdempotencyTTL <= 0 { + return nil, errors.New("new accept intent service: idempotency ttl must be positive") + } + if cfg.Logger == nil { + cfg.Logger = slog.Default() + } + if err := cfg.AdminRouting.Validate(); err != nil { + return nil, fmt.Errorf("new accept intent service: %w", err) + } + + return &Service{ + store: cfg.Store, + userDirectory: cfg.UserDirectory, + clock: cfg.Clock, + logger: cfg.Logger.With("component", "accept_intent"), + telemetry: cfg.Telemetry, + pushMaxAttempts: cfg.PushMaxAttempts, + emailMaxAttempts: cfg.EmailMaxAttempts, + idempotencyTTL: cfg.IdempotencyTTL, + adminRouting: cfg.AdminRouting, + }, nil +} + +// Execute durably accepts one normalized intent. +func (service *Service) Execute(ctx context.Context, input AcceptInput) (Result, error) { + if ctx == nil { + return Result{}, errors.New("accept intent: nil context") + } + if service == nil { + return Result{}, errors.New("accept intent: nil service") + } + if err := input.Validate(); err != nil { + return Result{}, fmt.Errorf("accept intent: %w", err) + } + + fingerprint, err := requestFingerprint(input.Intent) + if err != nil { + return Result{}, fmt.Errorf("accept intent: %w", err) + } + + if result, handled, err := service.resolveReplay(ctx, input, fingerprint); handled { + return result, err + } + + createInput, result, err := service.buildCreateInput(ctx, input, fingerprint) + if err != nil { + switch { + case errors.Is(err, ErrRecipientNotFound): + return Result{}, err + case errors.Is(err, ErrServiceUnavailable): + return Result{}, err + default: + return Result{}, fmt.Errorf("accept intent: %w", err) + } + } + + if err := service.store.CreateAcceptance(ctx, createInput); err != nil { + if !errors.Is(err, ErrConflict) { + return Result{}, fmt.Errorf("%w: create acceptance: %v", ErrServiceUnavailable, err) + } + + if replayResult, handled, replayErr := service.resolveReplay(ctx, input, fingerprint); handled { + return replayResult, replayErr + } + + return Result{}, fmt.Errorf("%w: create acceptance conflict without replay state", ErrServiceUnavailable) + } + + service.recordIntentOutcome(ctx, createInput.Notification, string(result.Outcome)) + + logArgs := logging.NotificationAttrs( + createInput.Notification.NotificationID, + createInput.Notification.NotificationType, + createInput.Notification.Producer, + createInput.Notification.AudienceKind, + createInput.Notification.IdempotencyKey, + createInput.Notification.RequestID, + createInput.Notification.TraceID, + ) + logArgs = append(logArgs, + "route_count", len(createInput.Routes), + "outcome", string(result.Outcome), + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + service.logger.Info("notification intent accepted", logArgs...) + + return result, nil +} + +// Validate reports whether result stores a supported intent-acceptance +// outcome. +func (result Result) Validate() error { + switch result.Outcome { + case OutcomeAccepted, OutcomeDuplicate: + return nil + default: + return fmt.Errorf("accept intent outcome %q is unsupported", result.Outcome) + } +} + +// Validate reports whether input contains a usable acceptance request. +func (input AcceptInput) Validate() error { + if strings.TrimSpace(input.NotificationID) == "" { + return errors.New("accept input notification id must not be empty") + } + if err := input.Intent.Validate(); err != nil { + return fmt.Errorf("accept input intent: %w", err) + } + + return nil +} + +// Validate reports whether record contains a complete notification record. +func (record NotificationRecord) Validate() error { + if strings.TrimSpace(record.NotificationID) == "" { + return errors.New("notification record notification id must not be empty") + } + if !record.NotificationType.IsKnown() { + return fmt.Errorf("notification record type %q is unsupported", record.NotificationType) + } + if !record.Producer.IsKnown() { + return fmt.Errorf("notification record producer %q is unsupported", record.Producer) + } + if !record.AudienceKind.IsKnown() { + return fmt.Errorf("notification record audience kind %q is unsupported", record.AudienceKind) + } + if strings.TrimSpace(record.PayloadJSON) == "" { + return errors.New("notification record payload json must not be empty") + } + if strings.TrimSpace(record.IdempotencyKey) == "" { + return errors.New("notification record idempotency key must not be empty") + } + if strings.TrimSpace(record.RequestFingerprint) == "" { + return errors.New("notification record request fingerprint must not be empty") + } + if err := validateTimestamp("notification record occurred at", record.OccurredAt); err != nil { + return err + } + if err := validateTimestamp("notification record accepted at", record.AcceptedAt); err != nil { + return err + } + if err := validateTimestamp("notification record updated at", record.UpdatedAt); err != nil { + return err + } + if record.AudienceKind == intentstream.AudienceKindUser && len(record.RecipientUserIDs) == 0 { + return errors.New("notification record recipient user ids must not be empty for audience kind user") + } + if record.AudienceKind == intentstream.AudienceKindAdminEmail && len(record.RecipientUserIDs) > 0 { + return errors.New("notification record recipient user ids must be empty for audience kind admin_email") + } + + return nil +} + +// Validate reports whether route contains a complete route record. +func (route NotificationRoute) Validate() error { + if strings.TrimSpace(route.NotificationID) == "" { + return errors.New("notification route notification id must not be empty") + } + if strings.TrimSpace(route.RouteID) == "" { + return errors.New("notification route route id must not be empty") + } + if !route.Channel.IsKnown() { + return fmt.Errorf("notification route channel %q is unsupported", route.Channel) + } + if strings.TrimSpace(route.RecipientRef) == "" { + return errors.New("notification route recipient ref must not be empty") + } + if !route.Status.IsKnown() { + return fmt.Errorf("notification route status %q is unsupported", route.Status) + } + if route.AttemptCount < 0 { + return errors.New("notification route attempt count must not be negative") + } + if route.MaxAttempts <= 0 { + return errors.New("notification route max attempts must be positive") + } + if err := validateTimestamp("notification route created at", route.CreatedAt); err != nil { + return err + } + if err := validateTimestamp("notification route updated at", route.UpdatedAt); err != nil { + return err + } + switch route.Status { + case RouteStatusPending, RouteStatusFailed: + if err := validateTimestamp("notification route next attempt at", route.NextAttemptAt); err != nil { + return err + } + case RouteStatusSkipped: + if !route.NextAttemptAt.IsZero() { + return errors.New("notification route next attempt at must be zero for skipped routes") + } + if err := validateTimestamp("notification route skipped at", route.SkippedAt); err != nil { + return err + } + } + + return nil +} + +// IsKnown reports whether status belongs to the frozen route-status surface. +func (status RouteStatus) IsKnown() bool { + switch status { + case RouteStatusPending, + RouteStatusPublished, + RouteStatusFailed, + RouteStatusDeadLetter, + RouteStatusSkipped: + return true + default: + return false + } +} + +// Validate reports whether record contains a complete idempotency record. +func (record IdempotencyRecord) Validate() error { + if !record.Producer.IsKnown() { + return fmt.Errorf("idempotency record producer %q is unsupported", record.Producer) + } + if strings.TrimSpace(record.IdempotencyKey) == "" { + return errors.New("idempotency record idempotency key must not be empty") + } + if strings.TrimSpace(record.NotificationID) == "" { + return errors.New("idempotency record notification id must not be empty") + } + if strings.TrimSpace(record.RequestFingerprint) == "" { + return errors.New("idempotency record request fingerprint must not be empty") + } + if err := validateTimestamp("idempotency record created at", record.CreatedAt); err != nil { + return err + } + if err := validateTimestamp("idempotency record expires at", record.ExpiresAt); err != nil { + return err + } + if !record.ExpiresAt.After(record.CreatedAt) { + return errors.New("idempotency record expires at must be after created at") + } + + return nil +} + +// Validate reports whether input contains a consistent durable write set. +func (input CreateAcceptanceInput) Validate() error { + if err := input.Notification.Validate(); err != nil { + return fmt.Errorf("notification: %w", err) + } + if err := input.Idempotency.Validate(); err != nil { + return fmt.Errorf("idempotency: %w", err) + } + if input.Idempotency.NotificationID != input.Notification.NotificationID { + return errors.New("idempotency notification id must match notification record") + } + if input.Idempotency.Producer != input.Notification.Producer { + return errors.New("idempotency producer must match notification record") + } + if input.Idempotency.IdempotencyKey != input.Notification.IdempotencyKey { + return errors.New("idempotency key must match notification record") + } + if input.Idempotency.RequestFingerprint != input.Notification.RequestFingerprint { + return errors.New("idempotency request fingerprint must match notification record") + } + + seenRouteIDs := make(map[string]struct{}, len(input.Routes)) + for index, route := range input.Routes { + if err := route.Validate(); err != nil { + return fmt.Errorf("routes[%d]: %w", index, err) + } + if route.NotificationID != input.Notification.NotificationID { + return fmt.Errorf("routes[%d]: notification id must match notification record", index) + } + if _, ok := seenRouteIDs[route.RouteID]; ok { + return fmt.Errorf("routes[%d]: route id %q is duplicated", index, route.RouteID) + } + seenRouteIDs[route.RouteID] = struct{}{} + if input.Notification.AudienceKind == intentstream.AudienceKindUser { + if !strings.HasPrefix(route.RecipientRef, "user:") { + return fmt.Errorf("routes[%d]: recipient ref must use user: prefix for audience kind user", index) + } + if strings.TrimSpace(route.ResolvedEmail) == "" { + return fmt.Errorf("routes[%d]: resolved email must not be empty for audience kind user", index) + } + if strings.TrimSpace(route.ResolvedLocale) == "" { + return fmt.Errorf("routes[%d]: resolved locale must not be empty for audience kind user", index) + } + } + } + + return nil +} + +func (service *Service) buildCreateInput(ctx context.Context, input AcceptInput, fingerprint string) (CreateAcceptanceInput, Result, error) { + now := service.clock.Now().UTC().Truncate(time.Millisecond) + + record := NotificationRecord{ + NotificationID: input.NotificationID, + NotificationType: input.Intent.NotificationType, + Producer: input.Intent.Producer, + AudienceKind: input.Intent.AudienceKind, + RecipientUserIDs: append([]string(nil), input.Intent.RecipientUserIDs...), + PayloadJSON: input.Intent.PayloadJSON, + IdempotencyKey: input.Intent.IdempotencyKey, + RequestFingerprint: fingerprint, + RequestID: input.Intent.RequestID, + TraceID: input.Intent.TraceID, + OccurredAt: input.Intent.OccurredAt, + AcceptedAt: now, + UpdatedAt: now, + } + + routes, err := service.materializeRoutes(ctx, record, now) + if err != nil { + return CreateAcceptanceInput{}, Result{}, fmt.Errorf("materialize routes: %w", err) + } + + createInput := CreateAcceptanceInput{ + Notification: record, + Routes: routes, + Idempotency: IdempotencyRecord{ + Producer: record.Producer, + IdempotencyKey: record.IdempotencyKey, + NotificationID: record.NotificationID, + RequestFingerprint: fingerprint, + CreatedAt: now, + ExpiresAt: now.Add(service.idempotencyTTL), + }, + } + if err := createInput.Validate(); err != nil { + return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build create acceptance input: %w", err) + } + + result := Result{Outcome: OutcomeAccepted} + if err := result.Validate(); err != nil { + return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build acceptance result: %w", err) + } + + return createInput, result, nil +} + +func (service *Service) materializeRoutes(ctx context.Context, record NotificationRecord, now time.Time) ([]NotificationRoute, error) { + switch record.AudienceKind { + case intentstream.AudienceKindUser: + recipients, err := service.resolveRecipients(ctx, record.NotificationType, record.RecipientUserIDs) + if err != nil { + return nil, err + } + + routes := make([]NotificationRoute, 0, len(record.RecipientUserIDs)*2) + for _, userID := range record.RecipientUserIDs { + recipient := recipients[userID] + recipientRef := "user:" + userID + routes = append(routes, + service.newRoute(record, now, intentstream.ChannelPush, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)), + service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)), + ) + } + return routes, nil + case intentstream.AudienceKindAdminEmail: + adminEmails := service.adminEmailsFor(record.NotificationType) + if len(adminEmails) == 0 { + return []NotificationRoute{ + service.newSyntheticAdminConfigRoute(record, now), + }, nil + } + + routes := make([]NotificationRoute, 0, len(adminEmails)*2) + for _, email := range adminEmails { + recipientRef := "email:" + email + routes = append(routes, + service.newRoute(record, now, intentstream.ChannelPush, recipientRef, email, intentstream.DefaultResolvedLocale()), + service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, email, intentstream.DefaultResolvedLocale()), + ) + } + return routes, nil + default: + return nil, fmt.Errorf("unsupported audience kind %q", record.AudienceKind) + } +} + +func (service *Service) resolveRecipients(ctx context.Context, notificationType intentstream.NotificationType, userIDs []string) (map[string]UserRecord, error) { + recipients := make(map[string]UserRecord, len(userIDs)) + for _, userID := range userIDs { + record, err := service.userDirectory.GetUserByID(ctx, userID) + switch { + case err == nil: + if err := record.Validate(); err != nil { + service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable") + return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err) + } + service.recordUserEnrichmentAttempt(ctx, notificationType, "success") + recipients[userID] = record + case errors.Is(err, ErrRecipientNotFound): + service.recordUserEnrichmentAttempt(ctx, notificationType, "recipient_not_found") + return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrRecipientNotFound, userID, err) + default: + service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable") + return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err) + } + } + + return recipients, nil +} + +func (service *Service) newRoute( + record NotificationRecord, + now time.Time, + channel intentstream.Channel, + recipientRef string, + resolvedEmail string, + resolvedLocale string, +) NotificationRoute { + route := NotificationRoute{ + NotificationID: record.NotificationID, + RouteID: string(channel) + ":" + recipientRef, + Channel: channel, + RecipientRef: recipientRef, + AttemptCount: 0, + MaxAttempts: service.maxAttempts(channel), + ResolvedEmail: resolvedEmail, + ResolvedLocale: resolvedLocale, + CreatedAt: now, + UpdatedAt: now, + } + + if record.NotificationType.SupportsChannel(record.AudienceKind, channel) { + route.Status = RouteStatusPending + route.NextAttemptAt = now + return route + } + + route.Status = RouteStatusSkipped + route.SkippedAt = now + return route +} + +func (service *Service) newSyntheticAdminConfigRoute(record NotificationRecord, now time.Time) NotificationRoute { + recipientRef := "config:" + string(record.NotificationType) + return NotificationRoute{ + NotificationID: record.NotificationID, + RouteID: string(intentstream.ChannelEmail) + ":" + recipientRef, + Channel: intentstream.ChannelEmail, + RecipientRef: recipientRef, + Status: RouteStatusSkipped, + AttemptCount: 0, + MaxAttempts: service.emailMaxAttempts, + CreatedAt: now, + UpdatedAt: now, + SkippedAt: now, + } +} + +func (service *Service) adminEmailsFor(notificationType intentstream.NotificationType) []string { + switch notificationType { + case intentstream.NotificationTypeGeoReviewRecommended: + return append([]string(nil), service.adminRouting.GeoReviewRecommended...) + case intentstream.NotificationTypeGameGenerationFailed: + return append([]string(nil), service.adminRouting.GameGenerationFailed...) + case intentstream.NotificationTypeLobbyRuntimePausedAfterStart: + return append([]string(nil), service.adminRouting.LobbyRuntimePausedAfterStart...) + case intentstream.NotificationTypeLobbyApplicationSubmitted: + return append([]string(nil), service.adminRouting.LobbyApplicationSubmitted...) + default: + return nil + } +} + +func (service *Service) maxAttempts(channel intentstream.Channel) int { + switch channel { + case intentstream.ChannelPush: + return service.pushMaxAttempts + case intentstream.ChannelEmail: + return service.emailMaxAttempts + default: + return 0 + } +} + +func resolveLocale(preferredLanguage string) string { + if preferredLanguage == intentstream.DefaultResolvedLocale() { + return intentstream.DefaultResolvedLocale() + } + + return intentstream.DefaultResolvedLocale() +} + +func (service *Service) resolveReplay(ctx context.Context, input AcceptInput, fingerprint string) (Result, bool, error) { + record, found, err := service.store.GetIdempotency(ctx, input.Intent.Producer, input.Intent.IdempotencyKey) + if err != nil { + return Result{}, true, fmt.Errorf("%w: load idempotency: %v", ErrServiceUnavailable, err) + } + if !found { + return Result{}, false, nil + } + if record.RequestFingerprint != fingerprint { + return Result{}, true, fmt.Errorf("%w: request conflicts with current state", ErrConflict) + } + + notificationRecord, found, err := service.store.GetNotification(ctx, record.NotificationID) + if err != nil { + return Result{}, true, fmt.Errorf("%w: load notification: %v", ErrServiceUnavailable, err) + } + if !found { + return Result{}, true, fmt.Errorf("%w: notification %q is missing for idempotency scope", ErrServiceUnavailable, record.NotificationID) + } + + if notificationRecord.NotificationID != record.NotificationID { + return Result{}, true, fmt.Errorf("%w: replay notification id mismatch", ErrServiceUnavailable) + } + + result := Result{Outcome: OutcomeDuplicate} + if err := result.Validate(); err != nil { + return Result{}, true, fmt.Errorf("%w: %v", ErrServiceUnavailable, err) + } + + service.recordIntentOutcome(ctx, notificationRecord, string(result.Outcome)) + + logArgs := logging.NotificationAttrs( + notificationRecord.NotificationID, + notificationRecord.NotificationType, + notificationRecord.Producer, + notificationRecord.AudienceKind, + notificationRecord.IdempotencyKey, + notificationRecord.RequestID, + notificationRecord.TraceID, + ) + logArgs = append(logArgs, + "outcome", string(result.Outcome), + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + service.logger.Info("notification intent replay resolved", logArgs...) + + return result, true, nil +} + +func requestFingerprint(intent intentstream.Intent) (string, error) { + if err := intent.Validate(); err != nil { + return "", err + } + + normalized := struct { + NotificationType intentstream.NotificationType `json:"notification_type"` + AudienceKind intentstream.AudienceKind `json:"audience_kind"` + RecipientUserIDs []string `json:"recipient_user_ids,omitempty"` + PayloadJSON json.RawMessage `json:"payload_json"` + }{ + NotificationType: intent.NotificationType, + AudienceKind: intent.AudienceKind, + RecipientUserIDs: append([]string(nil), intent.RecipientUserIDs...), + PayloadJSON: json.RawMessage(intent.PayloadJSON), + } + + payload, err := json.Marshal(normalized) + if err != nil { + return "", fmt.Errorf("marshal request fingerprint: %w", err) + } + + sum := sha256.Sum256(payload) + + return "sha256:" + hex.EncodeToString(sum[:]), nil +} + +func (service *Service) recordIntentOutcome(ctx context.Context, record NotificationRecord, outcome string) { + if service == nil || service.telemetry == nil || strings.TrimSpace(outcome) == "" { + return + } + + service.telemetry.RecordIntentOutcome( + ctx, + string(record.NotificationType), + string(record.Producer), + string(record.AudienceKind), + outcome, + ) +} + +func (service *Service) recordUserEnrichmentAttempt(ctx context.Context, notificationType intentstream.NotificationType, result string) { + if service == nil || service.telemetry == nil || strings.TrimSpace(result) == "" { + return + } + + service.telemetry.RecordUserEnrichmentAttempt(ctx, string(notificationType), result) +} + +func validateTimestamp(name string, value time.Time) error { + if value.IsZero() { + return fmt.Errorf("%s must not be zero", name) + } + if !value.Equal(value.UTC()) { + return fmt.Errorf("%s must be UTC", name) + } + if !value.Equal(value.Truncate(time.Millisecond)) { + return fmt.Errorf("%s must use millisecond precision", name) + } + + return nil +} diff --git a/notification/internal/service/acceptintent/service_test.go b/notification/internal/service/acceptintent/service_test.go new file mode 100644 index 0000000..eb0bcef --- /dev/null +++ b/notification/internal/service/acceptintent/service_test.go @@ -0,0 +1,613 @@ +package acceptintent + +import ( + "context" + "errors" + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/config" + + "github.com/stretchr/testify/require" +) + +func TestServiceAcceptsIntentAndMaterializesUserRoutes(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(map[string]UserRecord{ + "user-1": {Email: "one@example.com", PreferredLanguage: "en"}, + "user-2": {Email: "two@example.com", PreferredLanguage: "en-US"}, + }) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + result, err := service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-2", "user-1"}, "request-123", "trace-123", time.UnixMilli(1775121700001).UTC()), + }) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, result.Outcome) + require.Len(t, store.createInputs, 1) + + createInput := store.createInputs[0] + require.Equal(t, "1775121700000-0", createInput.Notification.NotificationID) + require.Equal(t, []string{"user-1", "user-2"}, createInput.Notification.RecipientUserIDs) + require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, createInput.Notification.PayloadJSON) + require.Len(t, createInput.Routes, 4) + pushUser1 := routeByID(t, createInput.Routes, "push:user:user-1") + emailUser1 := routeByID(t, createInput.Routes, "email:user:user-1") + pushUser2 := routeByID(t, createInput.Routes, "push:user:user-2") + emailUser2 := routeByID(t, createInput.Routes, "email:user:user-2") + require.Equal(t, RouteStatusPending, pushUser1.Status) + require.Equal(t, 3, pushUser1.MaxAttempts) + require.Equal(t, "one@example.com", pushUser1.ResolvedEmail) + require.Equal(t, "en", pushUser1.ResolvedLocale) + require.Equal(t, RouteStatusPending, emailUser1.Status) + require.Equal(t, 7, emailUser1.MaxAttempts) + require.Equal(t, "one@example.com", emailUser1.ResolvedEmail) + require.Equal(t, "en", emailUser1.ResolvedLocale) + require.Equal(t, "two@example.com", pushUser2.ResolvedEmail) + require.Equal(t, "en", pushUser2.ResolvedLocale) + require.Equal(t, "two@example.com", emailUser2.ResolvedEmail) + require.Equal(t, "en", emailUser2.ResolvedLocale) + require.Equal(t, []string{"user-1", "user-2"}, directory.lookups) +} + +func TestServiceTreatsEquivalentReplayAsDuplicate(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(map[string]UserRecord{ + "user-1": {Email: "one@example.com", PreferredLanguage: "en"}, + }) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + firstInput := AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-1", "trace-1", time.UnixMilli(1775121700001).UTC()), + } + secondInput := AcceptInput{ + NotificationID: "1775121700001-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-2", "trace-2", time.UnixMilli(1775121799999).UTC()), + } + + firstResult, err := service.Execute(context.Background(), firstInput) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, firstResult.Outcome) + + secondResult, err := service.Execute(context.Background(), secondInput) + require.NoError(t, err) + require.Equal(t, OutcomeDuplicate, secondResult.Outcome) + require.Len(t, store.createInputs, 1) + require.Equal(t, []string{"user-1"}, directory.lookups) +} + +func TestServiceRejectsConflictOnSameIdempotencyScope(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(map[string]UserRecord{ + "user-1": {Email: "one@example.com", PreferredLanguage: "en"}, + }) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + _, err = service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()), + }) + require.NoError(t, err) + + _, err = service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700002-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":55}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700002).UTC()), + }) + require.ErrorIs(t, err, ErrConflict) +} + +func TestServiceMaterializesPublicLobbyApplicationAdminRoutes(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(nil) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + AdminRouting: config.AdminRoutingConfig{ + LobbyApplicationSubmitted: []string{"owner@example.com"}, + }, + }) + require.NoError(t, err) + + result, err := service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validPublicApplicationIntent(), + }) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, result.Outcome) + require.Len(t, store.createInputs, 1) + require.Len(t, store.createInputs[0].Routes, 2) + + pushRoute := routeByID(t, store.createInputs[0].Routes, "push:email:owner@example.com") + emailRoute := routeByID(t, store.createInputs[0].Routes, "email:email:owner@example.com") + + require.Equal(t, RouteStatusSkipped, pushRoute.Status) + require.Equal(t, "owner@example.com", pushRoute.ResolvedEmail) + require.Equal(t, "en", pushRoute.ResolvedLocale) + require.Equal(t, RouteStatusPending, emailRoute.Status) + require.Equal(t, "owner@example.com", emailRoute.ResolvedEmail) + require.Equal(t, "en", emailRoute.ResolvedLocale) + require.Empty(t, directory.lookups) +} + +func TestServiceMaterializesSyntheticAdminConfigRouteWhenListIsEmpty(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(nil) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + result, err := service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validPublicApplicationIntent(), + }) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, result.Outcome) + require.Len(t, store.createInputs, 1) + require.Len(t, store.createInputs[0].Routes, 1) + + route := store.createInputs[0].Routes[0] + require.Equal(t, "email:config:lobby.application.submitted", route.RouteID) + require.Equal(t, RouteStatusSkipped, route.Status) + require.Equal(t, 7, route.MaxAttempts) + require.True(t, route.NextAttemptAt.IsZero()) + require.Empty(t, directory.lookups) +} + +func TestServiceMaterializesChannelMatrixAndRetryBudgets(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + tests := []struct { + name string + intent intentstream.Intent + adminRouting config.AdminRoutingConfig + wantRoutes map[string]struct { + status RouteStatus + maxAttempts int + } + }{ + { + name: "user push and email", + intent: validTurnReadyIntent( + `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + []string{"user-1"}, + "", + "", + now, + ), + wantRoutes: map[string]struct { + status RouteStatus + maxAttempts int + }{ + "push:user:user-1": {status: RouteStatusPending, maxAttempts: 3}, + "email:user:user-1": {status: RouteStatusPending, maxAttempts: 7}, + }, + }, + { + name: "user email only", + intent: intentstream.Intent{ + NotificationType: intentstream.NotificationTypeLobbyInviteExpired, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + IdempotencyKey: "game-123:invite-expired", + OccurredAt: now, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`, + }, + wantRoutes: map[string]struct { + status RouteStatus + maxAttempts int + }{ + "push:user:user-1": {status: RouteStatusSkipped, maxAttempts: 3}, + "email:user:user-1": {status: RouteStatusPending, maxAttempts: 7}, + }, + }, + { + name: "admin email only", + intent: intentstream.Intent{ + NotificationType: intentstream.NotificationTypeGeoReviewRecommended, + Producer: intentstream.ProducerGeoProfile, + AudienceKind: intentstream.AudienceKindAdminEmail, + IdempotencyKey: "geo:user-1", + OccurredAt: now, + PayloadJSON: `{"observed_country":"DE","review_reason":"country_mismatch","usual_connection_country":"PL","user_email":"pilot@example.com","user_id":"user-1"}`, + }, + adminRouting: config.AdminRoutingConfig{ + GeoReviewRecommended: []string{"admin@example.com"}, + }, + wantRoutes: map[string]struct { + status RouteStatus + maxAttempts int + }{ + "push:email:admin@example.com": {status: RouteStatusSkipped, maxAttempts: 3}, + "email:email:admin@example.com": {status: RouteStatusPending, maxAttempts: 7}, + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(map[string]UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "fr-FR"}, + }) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: now}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + AdminRouting: tt.adminRouting, + }) + require.NoError(t, err) + + result, err := service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: tt.intent, + }) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, result.Outcome) + require.Len(t, store.createInputs, 1) + require.Len(t, store.createInputs[0].Routes, len(tt.wantRoutes)) + + for routeID, want := range tt.wantRoutes { + route := routeByID(t, store.createInputs[0].Routes, routeID) + require.Equal(t, want.status, route.Status) + require.Equal(t, want.maxAttempts, route.MaxAttempts) + } + }) + } +} + +func TestServiceReturnsRecipientNotFoundForMissingUser(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(nil) + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + _, err = service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-missing"}, "", "", time.UnixMilli(1775121700001).UTC()), + }) + require.ErrorIs(t, err, ErrRecipientNotFound) + require.Empty(t, store.createInputs) + require.Equal(t, []string{"user-missing"}, directory.lookups) +} + +func TestServiceReturnsServiceUnavailableWhenDirectoryFails(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(nil) + directory.errByUserID["user-1"] = errors.New("user service unavailable") + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + _, err = service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()), + }) + require.ErrorIs(t, err, ErrServiceUnavailable) + require.Empty(t, store.createInputs) +} + +func TestServiceRecordsIntentAndUserEnrichmentTelemetry(t *testing.T) { + t.Parallel() + + store := newRecordingStore() + directory := newStaticUserDirectory(map[string]UserRecord{ + "user-1": {Email: "one@example.com", PreferredLanguage: "en"}, + }) + telemetry := &recordingTelemetry{} + service, err := New(Config{ + Store: store, + UserDirectory: directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + Telemetry: telemetry, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + input := AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()), + } + result, err := service.Execute(context.Background(), input) + require.NoError(t, err) + require.Equal(t, OutcomeAccepted, result.Outcome) + + duplicateInput := input + duplicateInput.NotificationID = "1775121700001-0" + result, err = service.Execute(context.Background(), duplicateInput) + require.NoError(t, err) + require.Equal(t, OutcomeDuplicate, result.Outcome) + + require.Equal(t, []intentOutcomeRecord{ + { + notificationType: "game.turn.ready", + producer: "game_master", + audienceKind: "user", + outcome: "accepted", + }, + { + notificationType: "game.turn.ready", + producer: "game_master", + audienceKind: "user", + outcome: "duplicate", + }, + }, telemetry.intentOutcomes) + require.Equal(t, []userEnrichmentRecord{ + {notificationType: "game.turn.ready", result: "success"}, + }, telemetry.userEnrichment) +} + +func TestServiceRecordsUserEnrichmentFailureTelemetry(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + directory *staticUserDirectory + want string + }{ + { + name: "recipient not found", + directory: newStaticUserDirectory(nil), + want: "recipient_not_found", + }, + { + name: "service unavailable", + directory: func() *staticUserDirectory { + directory := newStaticUserDirectory(nil) + directory.errByUserID["user-1"] = errors.New("user service unavailable") + return directory + }(), + want: "service_unavailable", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + telemetry := &recordingTelemetry{} + service, err := New(Config{ + Store: newRecordingStore(), + UserDirectory: tt.directory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + Telemetry: telemetry, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + _, err = service.Execute(context.Background(), AcceptInput{ + NotificationID: "1775121700000-0", + Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()), + }) + require.Error(t, err) + require.Equal(t, []userEnrichmentRecord{ + {notificationType: "game.turn.ready", result: tt.want}, + }, telemetry.userEnrichment) + }) + } +} + +type recordingStore struct { + createInputs []CreateAcceptanceInput + idempotency map[string]IdempotencyRecord + notifications map[string]NotificationRecord +} + +func newRecordingStore() *recordingStore { + return &recordingStore{ + idempotency: make(map[string]IdempotencyRecord), + notifications: make(map[string]NotificationRecord), + } +} + +func (store *recordingStore) CreateAcceptance(_ context.Context, input CreateAcceptanceInput) error { + if err := input.Validate(); err != nil { + return err + } + + key := string(input.Idempotency.Producer) + ":" + input.Idempotency.IdempotencyKey + if _, ok := store.idempotency[key]; ok { + return ErrConflict + } + + store.createInputs = append(store.createInputs, input) + store.idempotency[key] = input.Idempotency + store.notifications[input.Notification.NotificationID] = input.Notification + + return nil +} + +func (store *recordingStore) GetIdempotency(_ context.Context, producer intentstream.Producer, idempotencyKey string) (IdempotencyRecord, bool, error) { + record, ok := store.idempotency[string(producer)+":"+idempotencyKey] + return record, ok, nil +} + +func (store *recordingStore) GetNotification(_ context.Context, notificationID string) (NotificationRecord, bool, error) { + record, ok := store.notifications[notificationID] + return record, ok, nil +} + +type fixedClock struct { + now time.Time +} + +func (clock fixedClock) Now() time.Time { + return clock.now +} + +func validTurnReadyIntent(payload string, recipients []string, requestID string, traceID string, occurredAt time.Time) intentstream.Intent { + sorted := append([]string(nil), recipients...) + if len(sorted) == 2 && sorted[0] == "user-2" { + sorted[0], sorted[1] = sorted[1], sorted[0] + } + return intentstream.Intent{ + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: sorted, + IdempotencyKey: "game-123:turn-54", + OccurredAt: occurredAt.UTC().Truncate(time.Millisecond), + RequestID: requestID, + TraceID: traceID, + PayloadJSON: payload, + } +} + +func validPublicApplicationIntent() intentstream.Intent { + return intentstream.Intent{ + NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindAdminEmail, + IdempotencyKey: "game-456:application-submitted:user-42", + OccurredAt: time.UnixMilli(1775121700002).UTC(), + PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`, + } +} + +func routeByID(t *testing.T, routes []NotificationRoute, routeID string) NotificationRoute { + t.Helper() + + for _, route := range routes { + if route.RouteID == routeID { + return route + } + } + + t.Fatalf("route %q not found", routeID) + return NotificationRoute{} +} + +type staticUserDirectory struct { + records map[string]UserRecord + errByUserID map[string]error + lookups []string +} + +func newStaticUserDirectory(records map[string]UserRecord) *staticUserDirectory { + return &staticUserDirectory{ + records: records, + errByUserID: make(map[string]error), + } +} + +func (directory *staticUserDirectory) GetUserByID(_ context.Context, userID string) (UserRecord, error) { + directory.lookups = append(directory.lookups, userID) + if err, ok := directory.errByUserID[userID]; ok { + return UserRecord{}, err + } + record, ok := directory.records[userID] + if !ok { + return UserRecord{}, ErrRecipientNotFound + } + + return record, nil +} + +type recordingTelemetry struct { + intentOutcomes []intentOutcomeRecord + userEnrichment []userEnrichmentRecord +} + +func (telemetry *recordingTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) { + telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeRecord{ + notificationType: notificationType, + producer: producer, + audienceKind: audienceKind, + outcome: outcome, + }) +} + +func (telemetry *recordingTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) { + telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentRecord{ + notificationType: notificationType, + result: result, + }) +} + +type intentOutcomeRecord struct { + notificationType string + producer string + audienceKind string + outcome string +} + +type userEnrichmentRecord struct { + notificationType string + result string +} diff --git a/notification/internal/service/doc.go b/notification/internal/service/doc.go new file mode 100644 index 0000000..ffb9ea6 --- /dev/null +++ b/notification/internal/service/doc.go @@ -0,0 +1,3 @@ +// Package service reserves the application-service namespace of Notification +// Service. +package service diff --git a/notification/internal/service/malformedintent/model.go b/notification/internal/service/malformedintent/model.go new file mode 100644 index 0000000..31d87f7 --- /dev/null +++ b/notification/internal/service/malformedintent/model.go @@ -0,0 +1,135 @@ +// Package malformedintent defines the operator-visible record used for +// malformed notification intents. +package malformedintent + +import ( + "encoding/json" + "fmt" + "strings" + "time" +) + +// FailureCode identifies one stable malformed-intent rejection reason. +type FailureCode string + +const ( + // FailureCodeInvalidIntent reports malformed top-level intent fields or an + // invalid normalized envelope. + FailureCodeInvalidIntent FailureCode = "invalid_intent" + + // FailureCodeInvalidPayload reports malformed or schema-invalid + // `payload_json`. + FailureCodeInvalidPayload FailureCode = "invalid_payload" + + // FailureCodeIdempotencyConflict reports a duplicate idempotency scope that + // conflicts with already accepted normalized content. + FailureCodeIdempotencyConflict FailureCode = "idempotency_conflict" + + // FailureCodeRecipientNotFound reports that a user-targeted recipient user + // id could not be resolved through User Service. + FailureCodeRecipientNotFound FailureCode = "recipient_not_found" +) + +// Entry stores one operator-visible malformed notification-intent record. +type Entry struct { + // StreamEntryID stores the Redis Stream entry identifier of the rejected + // intent. + StreamEntryID string + + // NotificationType stores the optional raw notification type extracted from + // the rejected entry. + NotificationType string + + // Producer stores the optional raw producer value extracted from the + // rejected entry. + Producer string + + // IdempotencyKey stores the optional raw idempotency key extracted from the + // rejected entry. + IdempotencyKey string + + // FailureCode stores the stable rejection classification. + FailureCode FailureCode + + // FailureMessage stores the detailed validation or decode failure. + FailureMessage string + + // RawFields stores the raw top-level stream fields captured for operator + // inspection. + RawFields map[string]any + + // RecordedAt stores when the malformed intent was durably recorded. + RecordedAt time.Time +} + +// IsKnown reports whether code belongs to the frozen malformed-intent +// rejection surface. +func (code FailureCode) IsKnown() bool { + switch code { + case FailureCodeInvalidIntent, FailureCodeInvalidPayload, FailureCodeIdempotencyConflict, FailureCodeRecipientNotFound: + return true + default: + return false + } +} + +// Validate reports whether entry contains a complete malformed-intent record. +func (entry Entry) Validate() error { + if strings.TrimSpace(entry.StreamEntryID) == "" { + return fmt.Errorf("malformed intent stream entry id must not be empty") + } + if !entry.FailureCode.IsKnown() { + return fmt.Errorf("malformed intent failure code %q is unsupported", entry.FailureCode) + } + if strings.TrimSpace(entry.FailureMessage) == "" { + return fmt.Errorf("malformed intent failure message must not be empty") + } + if strings.TrimSpace(entry.FailureMessage) != entry.FailureMessage { + return fmt.Errorf("malformed intent failure message must not contain surrounding whitespace") + } + if entry.RawFields == nil { + return fmt.Errorf("malformed intent raw fields must not be nil") + } + if err := validateJSONObject("malformed intent raw fields", entry.RawFields); err != nil { + return err + } + if err := validateTimestamp("malformed intent recorded at", entry.RecordedAt); err != nil { + return err + } + + return nil +} + +func validateJSONObject(name string, value map[string]any) error { + payload, err := json.Marshal(value) + if err != nil { + return fmt.Errorf("%s: %w", name, err) + } + if string(payload) == "null" { + return fmt.Errorf("%s must encode as a JSON object", name) + } + + var decoded map[string]any + if err := json.Unmarshal(payload, &decoded); err != nil { + return fmt.Errorf("%s: %w", name, err) + } + if decoded == nil { + return fmt.Errorf("%s must encode as a JSON object", name) + } + + return nil +} + +func validateTimestamp(name string, value time.Time) error { + if value.IsZero() { + return fmt.Errorf("%s must not be zero", name) + } + if !value.Equal(value.UTC()) { + return fmt.Errorf("%s must be UTC", name) + } + if !value.Equal(value.Truncate(time.Millisecond)) { + return fmt.Errorf("%s must use millisecond precision", name) + } + + return nil +} diff --git a/notification/internal/service/publishmail/encoder.go b/notification/internal/service/publishmail/encoder.go new file mode 100644 index 0000000..b0088f0 --- /dev/null +++ b/notification/internal/service/publishmail/encoder.go @@ -0,0 +1,178 @@ +// Package publishmail encodes accepted email routes into Mail Service generic +// asynchronous template commands. +package publishmail + +import ( + "encoding/json" + "fmt" + netmail "net/mail" + "strconv" + "strings" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" +) + +const ( + commandSourceNotification = "notification" + commandPayloadModeTemplate = "template" +) + +// Command stores one Mail Service-compatible template delivery command +// produced from a durable notification email route. +type Command struct { + // DeliveryID stores the stable route-level delivery identifier. + DeliveryID string + + // IdempotencyKey stores the stable Mail Service deduplication key. + IdempotencyKey string + + // RequestedAt stores when Notification Service durably accepted the + // notification intent. + RequestedAt time.Time + + // PayloadJSON stores the fully encoded template-mode command payload. + PayloadJSON string + + // RequestID stores the optional correlation identifier. + RequestID string + + // TraceID stores the optional tracing correlation identifier. + TraceID string +} + +// Values returns the Redis Stream fields appended to the Mail Service command +// stream for Command. +func (command Command) Values() map[string]any { + values := map[string]any{ + "delivery_id": command.DeliveryID, + "source": commandSourceNotification, + "payload_mode": commandPayloadModeTemplate, + "idempotency_key": command.IdempotencyKey, + "requested_at_ms": strconv.FormatInt(command.RequestedAt.UTC().UnixMilli(), 10), + "payload_json": command.PayloadJSON, + } + if command.RequestID != "" { + values["request_id"] = command.RequestID + } + if command.TraceID != "" { + values["trace_id"] = command.TraceID + } + + return values +} + +// Encoder converts one accepted notification record plus its email route into +// one Mail Service-compatible generic template command. +type Encoder struct{} + +// Encode converts notification plus route into one template delivery command. +func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Command, error) { + if err := notification.Validate(); err != nil { + return Command{}, fmt.Errorf("encode mail command: %w", err) + } + if err := route.Validate(); err != nil { + return Command{}, fmt.Errorf("encode mail command: %w", err) + } + if notification.NotificationID != route.NotificationID { + return Command{}, fmt.Errorf("encode mail command: notification id %q does not match route notification id %q", notification.NotificationID, route.NotificationID) + } + if route.Channel != intentstream.ChannelEmail { + return Command{}, fmt.Errorf("encode mail command: route channel %q is unsupported", route.Channel) + } + if !notification.NotificationType.SupportsChannel(notification.AudienceKind, intentstream.ChannelEmail) { + return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: notification type %q does not support email", notification.NotificationType) + } + + recipientEmail, err := normalizedRecipientEmail(route.ResolvedEmail) + if err != nil { + return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err) + } + locale, err := normalizedLocale(route.ResolvedLocale) + if err != nil { + return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err) + } + variables, err := payloadVariables(notification.PayloadJSON) + if err != nil { + return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err) + } + + payloadJSON, err := json.Marshal(templatePayloadJSON{ + To: []string{recipientEmail}, + Cc: []string{}, + Bcc: []string{}, + ReplyTo: []string{}, + TemplateID: string(notification.NotificationType), + Locale: locale, + Variables: variables, + Attachments: []templateAttachmentJSON{}, + }) + if err != nil { + return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: marshal payload_json: %w", err) + } + + return Command{ + DeliveryID: notification.NotificationID + "/" + route.RouteID, + IdempotencyKey: "notification:" + notification.NotificationID + "/" + route.RouteID, + RequestedAt: notification.AcceptedAt, + PayloadJSON: string(payloadJSON), + RequestID: notification.RequestID, + TraceID: notification.TraceID, + }, nil +} + +type templatePayloadJSON struct { + To []string `json:"to"` + Cc []string `json:"cc"` + Bcc []string `json:"bcc"` + ReplyTo []string `json:"reply_to"` + TemplateID string `json:"template_id"` + Locale string `json:"locale"` + Variables json.RawMessage `json:"variables"` + Attachments []templateAttachmentJSON `json:"attachments"` +} + +type templateAttachmentJSON struct { + Filename string `json:"filename"` + ContentType string `json:"content_type"` + ContentBase64 string `json:"content_base64"` +} + +func normalizedRecipientEmail(value string) (string, error) { + if strings.TrimSpace(value) == "" { + return "", fmt.Errorf("resolved email must not be empty") + } + parsed, err := netmail.ParseAddress(value) + if err != nil { + return "", fmt.Errorf("resolved email %q must be valid: %w", value, err) + } + if parsed.Name != "" || parsed.Address != value { + return "", fmt.Errorf("resolved email %q must not include a display name", value) + } + + return value, nil +} + +func normalizedLocale(value string) (string, error) { + switch { + case strings.TrimSpace(value) == "": + return "", fmt.Errorf("resolved locale must not be empty") + case strings.TrimSpace(value) != value: + return "", fmt.Errorf("resolved locale %q must not contain surrounding whitespace", value) + default: + return value, nil + } +} + +func payloadVariables(payloadJSON string) (json.RawMessage, error) { + var payloadObject map[string]json.RawMessage + if err := json.Unmarshal([]byte(payloadJSON), &payloadObject); err != nil { + return nil, fmt.Errorf("decode payload_json: %w", err) + } + if payloadObject == nil { + return nil, fmt.Errorf("payload_json must be a JSON object") + } + + return json.RawMessage(payloadJSON), nil +} diff --git a/notification/internal/service/publishmail/encoder_test.go b/notification/internal/service/publishmail/encoder_test.go new file mode 100644 index 0000000..ab400d0 --- /dev/null +++ b/notification/internal/service/publishmail/encoder_test.go @@ -0,0 +1,275 @@ +package publishmail + +import ( + "encoding/json" + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + + "github.com/stretchr/testify/require" +) + +func TestEncoderEncodesUserAndAdminEmailCommands(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + tests := []struct { + name string + notification acceptintent.NotificationRecord + route acceptintent.NotificationRoute + wantDeliveryID string + wantIdempotency string + wantPayloadJSON string + }{ + { + name: "user route", + notification: acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + IdempotencyKey: "game-123:turn-54", + RequestFingerprint: "sha256:deadbeef", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + route: acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + wantDeliveryID: "1775121700000-0/email:user:user-1", + wantIdempotency: "notification:1775121700000-0/email:user:user-1", + wantPayloadJSON: `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`, + }, + { + name: "admin route", + notification: acceptintent.NotificationRecord{ + NotificationID: "1775121700001-0", + NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindAdminEmail, + PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`, + IdempotencyKey: "game-456:application-submitted:user-42", + RequestFingerprint: "sha256:cafebabe", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + route: acceptintent.NotificationRoute{ + NotificationID: "1775121700001-0", + RouteID: "email:email:owner@example.com", + Channel: intentstream.ChannelEmail, + RecipientRef: "email:owner@example.com", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "owner@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + wantDeliveryID: "1775121700001-0/email:email:owner@example.com", + wantIdempotency: "notification:1775121700001-0/email:email:owner@example.com", + wantPayloadJSON: `{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"},"attachments":[]}`, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + command, err := Encoder{}.Encode(tt.notification, tt.route) + require.NoError(t, err) + require.Equal(t, tt.wantDeliveryID, command.DeliveryID) + require.Equal(t, tt.wantIdempotency, command.IdempotencyKey) + require.Equal(t, now, command.RequestedAt) + require.JSONEq(t, tt.wantPayloadJSON, command.PayloadJSON) + + values := command.Values() + require.Equal(t, tt.wantDeliveryID, values["delivery_id"]) + require.Equal(t, "notification", values["source"]) + require.Equal(t, "template", values["payload_mode"]) + require.Equal(t, tt.wantIdempotency, values["idempotency_key"]) + require.Equal(t, "1775121700000", values["requested_at_ms"]) + }) + } +} + +func TestEncoderPropagatesTracingMetadata(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + command, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + IdempotencyKey: "game-123:turn-54", + RequestFingerprint: "sha256:deadbeef", + RequestID: "request-1", + TraceID: "trace-1", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.NoError(t, err) + + values := command.Values() + require.Equal(t, "request-1", values["request_id"]) + require.Equal(t, "trace-1", values["trace_id"]) +} + +func TestEncoderPreservesNormalizedPayloadAsTemplateVariables(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + command, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameFinished, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"final_turn_number":81,"game_id":"game-123","game_name":"Nebula Clash"}`, + IdempotencyKey: "game-123:final", + RequestFingerprint: "sha256:deadbeef", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.NoError(t, err) + + var payload struct { + Variables map[string]any `json:"variables"` + } + require.NoError(t, json.Unmarshal([]byte(command.PayloadJSON), &payload)) + require.Equal(t, map[string]any{ + "final_turn_number": float64(81), + "game_id": "game-123", + "game_name": "Nebula Clash", + }, payload.Variables) +} + +func TestEncoderUsesEmptyAncillaryEnvelopeFields(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + command, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeLobbyInviteExpired, + Producer: intentstream.ProducerGameLobby, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`, + IdempotencyKey: "game-123:invite-expired", + RequestFingerprint: "sha256:deadbeef", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.NoError(t, err) + + require.JSONEq( + t, + `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.invite.expired","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"},"attachments":[]}`, + command.PayloadJSON, + ) +} + +func TestEncoderRejectsInvalidRouteForMailPublication(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + _, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + IdempotencyKey: "game-123:turn-54", + RequestFingerprint: "sha256:deadbeef", + AcceptedAt: now, + OccurredAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "push:user:user-1", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 3, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.Error(t, err) + require.ErrorContains(t, err, `route channel "push" is unsupported`) +} diff --git a/notification/internal/service/publishpush/encoder.go b/notification/internal/service/publishpush/encoder.go new file mode 100644 index 0000000..58beba7 --- /dev/null +++ b/notification/internal/service/publishpush/encoder.go @@ -0,0 +1,221 @@ +// Package publishpush encodes user-facing notification routes into Gateway +// client-event payloads. +package publishpush + +import ( + "encoding/json" + "errors" + "fmt" + "strings" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + "galaxy/transcoder" +) + +// Event stores one Gateway-compatible client event produced from a +// user-targeted notification route. +type Event struct { + // UserID stores the authenticated user fan-out target. + UserID string + + // EventType stores the stable client-facing event type. + EventType string + + // EventID stores the stable route-level event identifier. + EventID string + + // PayloadBytes stores the encoded FlatBuffers payload bytes. + PayloadBytes []byte + + // RequestID stores the optional correlation identifier. + RequestID string + + // TraceID stores the optional tracing correlation identifier. + TraceID string +} + +// Encoder maps one supported notification_type to the corresponding checked-in +// FlatBuffers payload encoder. +type Encoder struct{} + +// Encode converts one accepted notification record plus its push route into a +// Gateway-compatible client event. +func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Event, error) { + if err := notification.Validate(); err != nil { + return Event{}, fmt.Errorf("encode push event: %w", err) + } + if err := route.Validate(); err != nil { + return Event{}, fmt.Errorf("encode push event: %w", err) + } + if route.Channel != intentstream.ChannelPush { + return Event{}, fmt.Errorf("encode push event: route channel %q is unsupported", route.Channel) + } + + userID, err := userIDFromRecipientRef(route.RecipientRef) + if err != nil { + return Event{}, fmt.Errorf("encode push event: %w", err) + } + + payloadBytes, err := encodePayload(notification.NotificationType, notification.PayloadJSON) + if err != nil { + return Event{}, fmt.Errorf("encode push event: %w", err) + } + + return Event{ + UserID: userID, + EventType: string(notification.NotificationType), + EventID: notification.NotificationID + "/" + route.RouteID, + PayloadBytes: payloadBytes, + RequestID: notification.RequestID, + TraceID: notification.TraceID, + }, nil +} + +func encodePayload(notificationType intentstream.NotificationType, payloadJSON string) ([]byte, error) { + switch notificationType { + case intentstream.NotificationTypeGameTurnReady: + var payload struct { + GameID string `json:"game_id"` + TurnNumber int64 `json:"turn_number"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + if payload.TurnNumber < 1 { + return nil, errors.New("payload_encoding_failed: turn_number must be at least 1") + } + return wrapPayloadEncoding(transcoder.GameTurnReadyEventToPayload(&transcoder.GameTurnReadyEvent{ + GameID: payload.GameID, + TurnNumber: payload.TurnNumber, + })) + case intentstream.NotificationTypeGameFinished: + var payload struct { + GameID string `json:"game_id"` + FinalTurnNumber int64 `json:"final_turn_number"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + if payload.FinalTurnNumber < 1 { + return nil, errors.New("payload_encoding_failed: final_turn_number must be at least 1") + } + return wrapPayloadEncoding(transcoder.GameFinishedEventToPayload(&transcoder.GameFinishedEvent{ + GameID: payload.GameID, + FinalTurnNumber: payload.FinalTurnNumber, + })) + case intentstream.NotificationTypeLobbyApplicationSubmitted: + var payload struct { + GameID string `json:"game_id"` + ApplicantUserID string `json:"applicant_user_id"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + if payload.ApplicantUserID == "" { + return nil, errors.New("payload_encoding_failed: applicant_user_id is empty") + } + return wrapPayloadEncoding(transcoder.LobbyApplicationSubmittedEventToPayload(&transcoder.LobbyApplicationSubmittedEvent{ + GameID: payload.GameID, + ApplicantUserID: payload.ApplicantUserID, + })) + case intentstream.NotificationTypeLobbyMembershipApproved: + var payload struct { + GameID string `json:"game_id"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + return wrapPayloadEncoding(transcoder.LobbyMembershipApprovedEventToPayload(&transcoder.LobbyMembershipApprovedEvent{ + GameID: payload.GameID, + })) + case intentstream.NotificationTypeLobbyMembershipRejected: + var payload struct { + GameID string `json:"game_id"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + return wrapPayloadEncoding(transcoder.LobbyMembershipRejectedEventToPayload(&transcoder.LobbyMembershipRejectedEvent{ + GameID: payload.GameID, + })) + case intentstream.NotificationTypeLobbyInviteCreated: + var payload struct { + GameID string `json:"game_id"` + InviterUserID string `json:"inviter_user_id"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + if payload.InviterUserID == "" { + return nil, errors.New("payload_encoding_failed: inviter_user_id is empty") + } + return wrapPayloadEncoding(transcoder.LobbyInviteCreatedEventToPayload(&transcoder.LobbyInviteCreatedEvent{ + GameID: payload.GameID, + InviterUserID: payload.InviterUserID, + })) + case intentstream.NotificationTypeLobbyInviteRedeemed: + var payload struct { + GameID string `json:"game_id"` + InviteeUserID string `json:"invitee_user_id"` + } + if err := decodePayload(payloadJSON, &payload); err != nil { + return nil, err + } + if payload.GameID == "" { + return nil, errors.New("payload_encoding_failed: game_id is empty") + } + if payload.InviteeUserID == "" { + return nil, errors.New("payload_encoding_failed: invitee_user_id is empty") + } + return wrapPayloadEncoding(transcoder.LobbyInviteRedeemedEventToPayload(&transcoder.LobbyInviteRedeemedEvent{ + GameID: payload.GameID, + InviteeUserID: payload.InviteeUserID, + })) + default: + return nil, fmt.Errorf("payload_encoding_failed: notification type %q does not support push", notificationType) + } +} + +func decodePayload(payloadJSON string, target any) error { + if err := json.Unmarshal([]byte(payloadJSON), target); err != nil { + return fmt.Errorf("payload_encoding_failed: decode payload_json: %w", err) + } + + return nil +} + +func wrapPayloadEncoding(payload []byte, err error) ([]byte, error) { + if err != nil { + return nil, fmt.Errorf("payload_encoding_failed: %w", err) + } + + return payload, nil +} + +func userIDFromRecipientRef(recipientRef string) (string, error) { + userID, ok := strings.CutPrefix(recipientRef, "user:") + if !ok || userID == "" { + return "", fmt.Errorf("recipient_ref %q is not user-targeted", recipientRef) + } + + return userID, nil +} diff --git a/notification/internal/service/publishpush/encoder_test.go b/notification/internal/service/publishpush/encoder_test.go new file mode 100644 index 0000000..2b45678 --- /dev/null +++ b/notification/internal/service/publishpush/encoder_test.go @@ -0,0 +1,186 @@ +package publishpush + +import ( + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + "galaxy/transcoder" + + "github.com/stretchr/testify/require" +) + +func TestEncoderEncodesSupportedPushNotificationTypes(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + tests := []struct { + name string + notificationType intentstream.NotificationType + payloadJSON string + assertPayload func(*testing.T, []byte) + }{ + { + name: "game turn ready", + notificationType: intentstream.NotificationTypeGameTurnReady, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","turn_number":54}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToGameTurnReadyEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-1", event.GameID) + require.Equal(t, int64(54), event.TurnNumber) + }, + }, + { + name: "game finished", + notificationType: intentstream.NotificationTypeGameFinished, + payloadJSON: `{"final_turn_number":81,"game_id":"game-2","game_name":"Nova"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToGameFinishedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-2", event.GameID) + require.Equal(t, int64(81), event.FinalTurnNumber) + }, + }, + { + name: "lobby application submitted", + notificationType: intentstream.NotificationTypeLobbyApplicationSubmitted, + payloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-2","game_id":"game-3","game_name":"Orion Front"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToLobbyApplicationSubmittedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-3", event.GameID) + require.Equal(t, "user-2", event.ApplicantUserID) + }, + }, + { + name: "lobby membership approved", + notificationType: intentstream.NotificationTypeLobbyMembershipApproved, + payloadJSON: `{"game_id":"game-4","game_name":"Ares"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToLobbyMembershipApprovedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-4", event.GameID) + }, + }, + { + name: "lobby membership rejected", + notificationType: intentstream.NotificationTypeLobbyMembershipRejected, + payloadJSON: `{"game_id":"game-5","game_name":"Atlas"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToLobbyMembershipRejectedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-5", event.GameID) + }, + }, + { + name: "lobby invite created", + notificationType: intentstream.NotificationTypeLobbyInviteCreated, + payloadJSON: `{"game_id":"game-6","game_name":"Vega","inviter_name":"Nova Pilot","inviter_user_id":"user-9"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToLobbyInviteCreatedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-6", event.GameID) + require.Equal(t, "user-9", event.InviterUserID) + }, + }, + { + name: "lobby invite redeemed", + notificationType: intentstream.NotificationTypeLobbyInviteRedeemed, + payloadJSON: `{"game_id":"game-7","game_name":"Lyra","invitee_name":"Skipper","invitee_user_id":"user-10"}`, + assertPayload: func(t *testing.T, payload []byte) { + t.Helper() + event, err := transcoder.PayloadToLobbyInviteRedeemedEvent(payload) + require.NoError(t, err) + require.Equal(t, "game-7", event.GameID) + require.Equal(t, "user-10", event.InviteeUserID) + }, + }, + } + + for _, tt := range tests { + tt := tt + + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + event, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: tt.notificationType, + Producer: tt.notificationType.ExpectedProducer(), + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: tt.payloadJSON, + IdempotencyKey: "idem-1", + RequestFingerprint: "sha256:deadbeef", + RequestID: "request-1", + TraceID: "trace-1", + OccurredAt: now, + AcceptedAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "push:user:user-1", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 3, + NextAttemptAt: now, + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.NoError(t, err) + require.Equal(t, "user-1", event.UserID) + require.Equal(t, string(tt.notificationType), event.EventType) + require.Equal(t, "1775121700000-0/push:user:user-1", event.EventID) + require.Equal(t, "request-1", event.RequestID) + require.Equal(t, "trace-1", event.TraceID) + require.NotEmpty(t, event.PayloadBytes) + tt.assertPayload(t, event.PayloadBytes) + }) + } +} + +func TestEncoderRejectsInvalidStoredPayload(t *testing.T) { + t.Parallel() + + now := time.UnixMilli(1775121700000).UTC() + _, err := Encoder{}.Encode( + acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"","game_name":"Nebula Clash","turn_number":0}`, + IdempotencyKey: "idem-1", + RequestFingerprint: "sha256:deadbeef", + OccurredAt: now, + AcceptedAt: now, + UpdatedAt: now, + }, + acceptintent.NotificationRoute{ + NotificationID: "1775121700000-0", + RouteID: "push:user:user-1", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + MaxAttempts: 3, + NextAttemptAt: now, + CreatedAt: now, + UpdatedAt: now, + }, + ) + require.Error(t, err) + require.ErrorContains(t, err, "payload_encoding_failed") +} diff --git a/notification/internal/telemetry/runtime.go b/notification/internal/telemetry/runtime.go new file mode 100644 index 0000000..4a1373e --- /dev/null +++ b/notification/internal/telemetry/runtime.go @@ -0,0 +1,694 @@ +// Package telemetry provides lightweight OpenTelemetry helpers and +// low-cardinality Notification Service instruments. +package telemetry + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "strings" + "sync" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" + "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/propagation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + oteltrace "go.opentelemetry.io/otel/trace" +) + +const meterName = "galaxy/notification" + +const ( + defaultServiceName = "galaxy-notification" + + processExporterNone = "none" + processExporterOTLP = "otlp" + processProtocolHTTPProtobuf = "http/protobuf" + processProtocolGRPC = "grpc" +) + +// ProcessConfig configures the process-wide OpenTelemetry runtime. +type ProcessConfig struct { + // ServiceName overrides the default OpenTelemetry service name. + ServiceName string + + // TracesExporter selects the external traces exporter. Supported values are + // `none` and `otlp`. + TracesExporter string + + // MetricsExporter selects the external metrics exporter. Supported values + // are `none` and `otlp`. + MetricsExporter string + + // TracesProtocol selects the OTLP traces protocol when TracesExporter is + // `otlp`. + TracesProtocol string + + // MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is + // `otlp`. + MetricsProtocol string + + // StdoutTracesEnabled enables the additional stdout trace exporter used for + // local development and debugging. + StdoutTracesEnabled bool + + // StdoutMetricsEnabled enables the additional stdout metric exporter used + // for local development and debugging. + StdoutMetricsEnabled bool +} + +// Validate reports whether cfg contains a supported OpenTelemetry exporter +// configuration. +func (cfg ProcessConfig) Validate() error { + switch cfg.TracesExporter { + case processExporterNone, processExporterOTLP: + default: + return fmt.Errorf("unsupported traces exporter %q", cfg.TracesExporter) + } + + switch cfg.MetricsExporter { + case processExporterNone, processExporterOTLP: + default: + return fmt.Errorf("unsupported metrics exporter %q", cfg.MetricsExporter) + } + + if cfg.TracesProtocol != "" && cfg.TracesProtocol != processProtocolHTTPProtobuf && cfg.TracesProtocol != processProtocolGRPC { + return fmt.Errorf("unsupported OTLP traces protocol %q", cfg.TracesProtocol) + } + if cfg.MetricsProtocol != "" && cfg.MetricsProtocol != processProtocolHTTPProtobuf && cfg.MetricsProtocol != processProtocolGRPC { + return fmt.Errorf("unsupported OTLP metrics protocol %q", cfg.MetricsProtocol) + } + + return nil +} + +// Runtime owns the Notification Service OpenTelemetry providers and +// low-cardinality custom instruments. +type Runtime struct { + tracerProvider oteltrace.TracerProvider + meterProvider metric.MeterProvider + + shutdownMu sync.Mutex + shutdownDone bool + shutdownErr error + shutdownFns []func(context.Context) error + + routeScheduleReaderMu sync.RWMutex + routeScheduleReader RouteScheduleSnapshotReader + + intentStreamLagReaderMu sync.RWMutex + intentStreamLagReader IntentStreamLagSnapshotReader + + internalHTTPRequests metric.Int64Counter + internalHTTPDuration metric.Float64Histogram + internalHTTPLifecycle metric.Int64Counter + intentOutcomes metric.Int64Counter + malformedIntents metric.Int64Counter + userEnrichment metric.Int64Counter + routePublishAttempts metric.Int64Counter + routeRetries metric.Int64Counter + routeDeadLetters metric.Int64Counter +} + +// RouteScheduleSnapshot stores the current observable state of the durable +// notification route schedule. +type RouteScheduleSnapshot struct { + // Depth stores how many route keys are currently present in the route + // schedule. + Depth int64 + + // OldestScheduledFor stores the oldest currently scheduled due time when + // one exists. + OldestScheduledFor *time.Time +} + +// RouteScheduleSnapshotReader loads one current route-schedule snapshot for +// observable gauge reporting. +type RouteScheduleSnapshotReader interface { + // ReadRouteScheduleSnapshot returns the current route-schedule depth and + // its oldest scheduled timestamp when one exists. + ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error) +} + +// IntentStreamLagSnapshot stores the current observable lag of the plain-XREAD +// notification-intent consumer. +type IntentStreamLagSnapshot struct { + // OldestUnprocessedAt stores the Redis Stream timestamp of the oldest + // entry that has not yet been durably processed. + OldestUnprocessedAt *time.Time +} + +// IntentStreamLagSnapshotReader loads one current intent-stream lag snapshot +// for observable gauge reporting. +type IntentStreamLagSnapshotReader interface { + // ReadIntentStreamLagSnapshot returns the oldest unprocessed stream entry + // timestamp when one exists. + ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error) +} + +// New constructs a lightweight telemetry runtime around meterProvider for +// tests and embedded use cases that do not need process-level exporter wiring. +func New(meterProvider metric.MeterProvider) (*Runtime, error) { + return NewWithProviders(meterProvider, nil) +} + +// NewWithProviders constructs a telemetry runtime around explicitly supplied +// meterProvider and tracerProvider values. +func NewWithProviders(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider) (*Runtime, error) { + if meterProvider == nil { + meterProvider = otel.GetMeterProvider() + } + if tracerProvider == nil { + tracerProvider = otel.GetTracerProvider() + } + if meterProvider == nil { + return nil, errors.New("new notification telemetry runtime: nil meter provider") + } + if tracerProvider == nil { + return nil, errors.New("new notification telemetry runtime: nil tracer provider") + } + + return buildRuntime(meterProvider, tracerProvider, nil) +} + +// NewProcess constructs the process-wide Notification Service OpenTelemetry +// runtime from cfg, installs the resulting providers globally, and returns the +// runtime. +func NewProcess(ctx context.Context, cfg ProcessConfig, logger *slog.Logger) (*Runtime, error) { + if ctx == nil { + return nil, errors.New("new notification telemetry process: nil context") + } + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("new notification telemetry process: %w", err) + } + if logger == nil { + logger = slog.Default() + } + + serviceName := strings.TrimSpace(cfg.ServiceName) + if serviceName == "" { + serviceName = defaultServiceName + } + + res := resource.NewSchemaless(attribute.String("service.name", serviceName)) + + tracerProvider, err := newTracerProvider(ctx, res, cfg) + if err != nil { + return nil, fmt.Errorf("new notification telemetry process: tracer provider: %w", err) + } + meterProvider, err := newMeterProvider(ctx, res, cfg) + if err != nil { + return nil, fmt.Errorf("new notification telemetry process: meter provider: %w", err) + } + + otel.SetTracerProvider(tracerProvider) + otel.SetMeterProvider(meterProvider) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + runtime, err := buildRuntime(meterProvider, tracerProvider, []func(context.Context) error{ + meterProvider.Shutdown, + tracerProvider.Shutdown, + }) + if err != nil { + return nil, fmt.Errorf("new notification telemetry process: runtime: %w", err) + } + + logger.Info("notification telemetry configured", + "service_name", serviceName, + "traces_exporter", cfg.TracesExporter, + "metrics_exporter", cfg.MetricsExporter, + ) + + return runtime, nil +} + +// TracerProvider returns the runtime tracer provider. +func (runtime *Runtime) TracerProvider() oteltrace.TracerProvider { + if runtime == nil || runtime.tracerProvider == nil { + return otel.GetTracerProvider() + } + + return runtime.tracerProvider +} + +// MeterProvider returns the runtime meter provider. +func (runtime *Runtime) MeterProvider() metric.MeterProvider { + if runtime == nil || runtime.meterProvider == nil { + return otel.GetMeterProvider() + } + + return runtime.meterProvider +} + +// Shutdown flushes and stops the configured telemetry providers. Shutdown is +// idempotent. +func (runtime *Runtime) Shutdown(ctx context.Context) error { + if runtime == nil { + return nil + } + + runtime.shutdownMu.Lock() + if runtime.shutdownDone { + err := runtime.shutdownErr + runtime.shutdownMu.Unlock() + return err + } + runtime.shutdownDone = true + runtime.shutdownMu.Unlock() + + var shutdownErr error + for index := len(runtime.shutdownFns) - 1; index >= 0; index-- { + shutdownErr = errors.Join(shutdownErr, runtime.shutdownFns[index](ctx)) + } + + runtime.shutdownMu.Lock() + runtime.shutdownErr = shutdownErr + runtime.shutdownMu.Unlock() + + return shutdownErr +} + +// RecordInternalHTTPRequest records one internal HTTP request outcome. +func (runtime *Runtime) RecordInternalHTTPRequest(ctx context.Context, attrs []attribute.KeyValue, duration time.Duration) { + if runtime == nil { + return + } + + options := metric.WithAttributes(attrs...) + runtime.internalHTTPRequests.Add(normalizeContext(ctx), 1, options) + runtime.internalHTTPDuration.Record(normalizeContext(ctx), duration.Seconds()*1000, options) +} + +// RecordInternalHTTPEvent records one internal HTTP server lifecycle event. +func (runtime *Runtime) RecordInternalHTTPEvent(ctx context.Context, event string) { + if runtime == nil { + return + } + + runtime.internalHTTPLifecycle.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes(attribute.String("event", strings.TrimSpace(event))), + ) +} + +// RecordIntentOutcome records one accepted notification-intent outcome. +func (runtime *Runtime) RecordIntentOutcome(ctx context.Context, notificationType string, producer string, audienceKind string, outcome string) { + if runtime == nil { + return + } + + runtime.intentOutcomes.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + attribute.String("producer", cleanAttribute(producer, "unknown")), + attribute.String("audience_kind", cleanAttribute(audienceKind, "unknown")), + attribute.String("outcome", cleanAttribute(outcome, "unknown")), + ), + ) +} + +// RecordMalformedIntent records one malformed or rejected notification intent. +func (runtime *Runtime) RecordMalformedIntent(ctx context.Context, failureCode string, notificationType string, producer string) { + if runtime == nil { + return + } + + runtime.malformedIntents.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("failure_code", cleanAttribute(failureCode, "unknown")), + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + attribute.String("producer", cleanAttribute(producer, "unknown")), + ), + ) +} + +// RecordUserEnrichmentAttempt records one User Service enrichment lookup +// outcome. +func (runtime *Runtime) RecordUserEnrichmentAttempt(ctx context.Context, notificationType string, result string) { + if runtime == nil { + return + } + + runtime.userEnrichment.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + attribute.String("result", cleanAttribute(result, "unknown")), + ), + ) +} + +// RecordRoutePublishAttempt records one route publication attempt outcome. +func (runtime *Runtime) RecordRoutePublishAttempt(ctx context.Context, channel string, notificationType string, result string, failureClassification string) { + if runtime == nil { + return + } + + runtime.routePublishAttempts.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("channel", cleanAttribute(channel, "unknown")), + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + attribute.String("result", cleanAttribute(result, "unknown")), + attribute.String("failure_classification", cleanAttribute(failureClassification, "none")), + ), + ) +} + +// RecordRouteRetry records one route retry scheduling event. +func (runtime *Runtime) RecordRouteRetry(ctx context.Context, channel string, notificationType string) { + if runtime == nil { + return + } + + runtime.routeRetries.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("channel", cleanAttribute(channel, "unknown")), + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + ), + ) +} + +// RecordRouteDeadLetter records one route transition to dead_letter. +func (runtime *Runtime) RecordRouteDeadLetter(ctx context.Context, channel string, notificationType string, failureClassification string) { + if runtime == nil { + return + } + + runtime.routeDeadLetters.Add( + normalizeContext(ctx), + 1, + metric.WithAttributes( + attribute.String("channel", cleanAttribute(channel, "unknown")), + attribute.String("notification_type", cleanAttribute(notificationType, "unknown")), + attribute.String("failure_classification", cleanAttribute(failureClassification, "unknown")), + ), + ) +} + +// SetRouteScheduleSnapshotReader installs the route-schedule reader used by +// the observable route schedule gauges. +func (runtime *Runtime) SetRouteScheduleSnapshotReader(reader RouteScheduleSnapshotReader) { + if runtime == nil { + return + } + + runtime.routeScheduleReaderMu.Lock() + runtime.routeScheduleReader = reader + runtime.routeScheduleReaderMu.Unlock() +} + +// SetIntentStreamLagSnapshotReader installs the intent-stream lag reader used +// by the observable lag gauge. +func (runtime *Runtime) SetIntentStreamLagSnapshotReader(reader IntentStreamLagSnapshotReader) { + if runtime == nil { + return + } + + runtime.intentStreamLagReaderMu.Lock() + runtime.intentStreamLagReader = reader + runtime.intentStreamLagReaderMu.Unlock() +} + +func buildRuntime(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider, shutdownFns []func(context.Context) error) (*Runtime, error) { + meter := meterProvider.Meter(meterName) + runtime := &Runtime{ + tracerProvider: tracerProvider, + meterProvider: meterProvider, + shutdownFns: append([]func(context.Context) error(nil), shutdownFns...), + } + + internalHTTPRequests, err := meter.Int64Counter("notification.internal_http.requests") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: internal_http.requests: %w", err) + } + internalHTTPDuration, err := meter.Float64Histogram("notification.internal_http.duration_ms", metric.WithUnit("ms")) + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: internal_http.duration_ms: %w", err) + } + internalHTTPLifecycle, err := meter.Int64Counter("notification.internal_http.lifecycle") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: internal_http.lifecycle: %w", err) + } + intentOutcomes, err := meter.Int64Counter("notification.intent.outcomes") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: intent.outcomes: %w", err) + } + malformedIntents, err := meter.Int64Counter("notification.intent.malformed") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: intent.malformed: %w", err) + } + userEnrichment, err := meter.Int64Counter("notification.user_enrichment.attempts") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: user_enrichment.attempts: %w", err) + } + routePublishAttempts, err := meter.Int64Counter("notification.route.publish_attempts") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: route.publish_attempts: %w", err) + } + routeRetries, err := meter.Int64Counter("notification.route.retries") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: route.retries: %w", err) + } + routeDeadLetters, err := meter.Int64Counter("notification.route.dead_letters") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: route.dead_letters: %w", err) + } + routeScheduleDepth, err := meter.Int64ObservableGauge("notification.route_schedule.depth") + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.depth: %w", err) + } + routeScheduleOldestAge, err := meter.Int64ObservableGauge("notification.route_schedule.oldest_age_ms", metric.WithUnit("ms")) + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.oldest_age_ms: %w", err) + } + intentStreamOldestUnprocessedAge, err := meter.Int64ObservableGauge("notification.intent_stream.oldest_unprocessed_age_ms", metric.WithUnit("ms")) + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: intent_stream.oldest_unprocessed_age_ms: %w", err) + } + registration, err := meter.RegisterCallback(func(ctx context.Context, observer metric.Observer) error { + runtime.observeRouteSchedule(ctx, observer, routeScheduleDepth, routeScheduleOldestAge) + runtime.observeIntentStreamLag(ctx, observer, intentStreamOldestUnprocessedAge) + return nil + }, routeScheduleDepth, routeScheduleOldestAge, intentStreamOldestUnprocessedAge) + if err != nil { + return nil, fmt.Errorf("build notification telemetry runtime: observable callbacks: %w", err) + } + runtime.shutdownFns = append(runtime.shutdownFns, func(context.Context) error { + return registration.Unregister() + }) + + runtime.internalHTTPRequests = internalHTTPRequests + runtime.internalHTTPDuration = internalHTTPDuration + runtime.internalHTTPLifecycle = internalHTTPLifecycle + runtime.intentOutcomes = intentOutcomes + runtime.malformedIntents = malformedIntents + runtime.userEnrichment = userEnrichment + runtime.routePublishAttempts = routePublishAttempts + runtime.routeRetries = routeRetries + runtime.routeDeadLetters = routeDeadLetters + + return runtime, nil +} + +func newTracerProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdktrace.TracerProvider, error) { + options := []sdktrace.TracerProviderOption{ + sdktrace.WithResource(res), + } + + if exporter, err := traceExporter(ctx, cfg); err != nil { + return nil, err + } else if exporter != nil { + options = append(options, sdktrace.WithBatcher(exporter)) + } + + if cfg.StdoutTracesEnabled { + exporter, err := stdouttrace.New(stdouttrace.WithWriter(os.Stdout)) + if err != nil { + return nil, fmt.Errorf("stdout traces exporter: %w", err) + } + options = append(options, sdktrace.WithBatcher(exporter)) + } + + return sdktrace.NewTracerProvider(options...), nil +} + +func newMeterProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdkmetric.MeterProvider, error) { + options := []sdkmetric.Option{ + sdkmetric.WithResource(res), + } + + if exporter, err := metricExporter(ctx, cfg); err != nil { + return nil, err + } else if exporter != nil { + options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter))) + } + + if cfg.StdoutMetricsEnabled { + exporter, err := stdoutmetric.New(stdoutmetric.WithWriter(os.Stdout)) + if err != nil { + return nil, fmt.Errorf("stdout metrics exporter: %w", err) + } + options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter))) + } + + return sdkmetric.NewMeterProvider(options...), nil +} + +func traceExporter(ctx context.Context, cfg ProcessConfig) (sdktrace.SpanExporter, error) { + if cfg.TracesExporter != processExporterOTLP { + return nil, nil + } + + switch normalizeProtocol(cfg.TracesProtocol) { + case processProtocolGRPC: + exporter, err := otlptracegrpc.New(ctx) + if err != nil { + return nil, fmt.Errorf("otlp grpc traces exporter: %w", err) + } + return exporter, nil + default: + exporter, err := otlptracehttp.New(ctx) + if err != nil { + return nil, fmt.Errorf("otlp http traces exporter: %w", err) + } + return exporter, nil + } +} + +func metricExporter(ctx context.Context, cfg ProcessConfig) (sdkmetric.Exporter, error) { + if cfg.MetricsExporter != processExporterOTLP { + return nil, nil + } + + switch normalizeProtocol(cfg.MetricsProtocol) { + case processProtocolGRPC: + exporter, err := otlpmetricgrpc.New(ctx) + if err != nil { + return nil, fmt.Errorf("otlp grpc metrics exporter: %w", err) + } + return exporter, nil + default: + exporter, err := otlpmetrichttp.New(ctx) + if err != nil { + return nil, fmt.Errorf("otlp http metrics exporter: %w", err) + } + return exporter, nil + } +} + +func normalizeProtocol(value string) string { + switch strings.TrimSpace(value) { + case processProtocolGRPC: + return processProtocolGRPC + default: + return processProtocolHTTPProtobuf + } +} + +func normalizeContext(ctx context.Context) context.Context { + if ctx == nil { + return context.Background() + } + + return ctx +} + +func cleanAttribute(value string, fallback string) string { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return fallback + } + + return trimmed +} + +func (runtime *Runtime) observeRouteSchedule( + ctx context.Context, + observer metric.Observer, + depthGauge metric.Int64ObservableGauge, + oldestAgeGauge metric.Int64ObservableGauge, +) { + depth := int64(0) + oldestAge := int64(0) + + reader := runtime.currentRouteScheduleReader() + if reader != nil { + snapshot, err := reader.ReadRouteScheduleSnapshot(ctx) + if err != nil { + otel.Handle(fmt.Errorf("observe notification route schedule: %w", err)) + } else { + if snapshot.Depth > 0 { + depth = snapshot.Depth + } + if snapshot.OldestScheduledFor != nil { + oldestAge = time.Since(snapshot.OldestScheduledFor.UTC()).Milliseconds() + if oldestAge < 0 { + oldestAge = 0 + } + } + } + } + + observer.ObserveInt64(depthGauge, depth) + observer.ObserveInt64(oldestAgeGauge, oldestAge) +} + +func (runtime *Runtime) observeIntentStreamLag( + ctx context.Context, + observer metric.Observer, + oldestUnprocessedAgeGauge metric.Int64ObservableGauge, +) { + oldestAge := int64(0) + + reader := runtime.currentIntentStreamLagReader() + if reader != nil { + snapshot, err := reader.ReadIntentStreamLagSnapshot(ctx) + if err != nil { + otel.Handle(fmt.Errorf("observe notification intent stream lag: %w", err)) + } else if snapshot.OldestUnprocessedAt != nil { + oldestAge = time.Since(snapshot.OldestUnprocessedAt.UTC()).Milliseconds() + if oldestAge < 0 { + oldestAge = 0 + } + } + } + + observer.ObserveInt64(oldestUnprocessedAgeGauge, oldestAge) +} + +func (runtime *Runtime) currentRouteScheduleReader() RouteScheduleSnapshotReader { + runtime.routeScheduleReaderMu.RLock() + defer runtime.routeScheduleReaderMu.RUnlock() + return runtime.routeScheduleReader +} + +func (runtime *Runtime) currentIntentStreamLagReader() IntentStreamLagSnapshotReader { + runtime.intentStreamLagReaderMu.RLock() + defer runtime.intentStreamLagReaderMu.RUnlock() + return runtime.intentStreamLagReader +} diff --git a/notification/internal/telemetry/runtime_test.go b/notification/internal/telemetry/runtime_test.go new file mode 100644 index 0000000..fa7fdab --- /dev/null +++ b/notification/internal/telemetry/runtime_test.go @@ -0,0 +1,228 @@ +package telemetry + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +func TestRuntimeRecordsMetrics(t *testing.T) { + t.Parallel() + + reader := sdkmetric.NewManualReader() + meterProvider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)) + tracerProvider := sdktrace.NewTracerProvider() + + runtime, err := NewWithProviders(meterProvider, tracerProvider) + require.NoError(t, err) + + runtime.RecordInternalHTTPRequest(context.Background(), []attribute.KeyValue{ + attribute.String("route", "/healthz"), + attribute.String("method", "GET"), + attribute.String("edge_outcome", "success"), + }, 5*time.Millisecond) + runtime.RecordInternalHTTPEvent(context.Background(), "started") + runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "accepted") + runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "duplicate") + runtime.RecordMalformedIntent(context.Background(), "idempotency_conflict", "game.turn.ready", "game_master") + runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "success") + runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "recipient_not_found") + runtime.RecordRoutePublishAttempt(context.Background(), "push", "game.turn.ready", "published", "") + runtime.RecordRoutePublishAttempt(context.Background(), "email", "game.turn.ready", "retry", "mail_stream_publish_failed") + runtime.RecordRouteRetry(context.Background(), "email", "game.turn.ready") + runtime.RecordRouteDeadLetter(context.Background(), "email", "game.turn.ready", "mail_stream_publish_failed") + scheduledAt := time.Now().Add(-time.Second).UTC() + unprocessedAt := time.Now().Add(-2 * time.Second).UTC() + runtime.SetRouteScheduleSnapshotReader(stubRouteScheduleSnapshotReader{ + snapshot: RouteScheduleSnapshot{ + Depth: 3, + OldestScheduledFor: &scheduledAt, + }, + }) + runtime.SetIntentStreamLagSnapshotReader(stubIntentStreamLagSnapshotReader{ + snapshot: IntentStreamLagSnapshot{ + OldestUnprocessedAt: &unprocessedAt, + }, + }) + + assertMetricCount(t, reader, "notification.internal_http.requests", map[string]string{ + "route": "/healthz", + "method": "GET", + "edge_outcome": "success", + }, 1) + assertMetricCount(t, reader, "notification.internal_http.lifecycle", map[string]string{ + "event": "started", + }, 1) + assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "outcome": "accepted", + }, 1) + assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "outcome": "duplicate", + }, 1) + assertMetricCount(t, reader, "notification.intent.malformed", map[string]string{ + "failure_code": "idempotency_conflict", + "notification_type": "game.turn.ready", + "producer": "game_master", + }, 1) + assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{ + "notification_type": "game.turn.ready", + "result": "success", + }, 1) + assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{ + "notification_type": "game.turn.ready", + "result": "recipient_not_found", + }, 1) + assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{ + "channel": "push", + "notification_type": "game.turn.ready", + "result": "published", + "failure_classification": "none", + }, 1) + assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{ + "channel": "email", + "notification_type": "game.turn.ready", + "result": "retry", + "failure_classification": "mail_stream_publish_failed", + }, 1) + assertMetricCount(t, reader, "notification.route.retries", map[string]string{ + "channel": "email", + "notification_type": "game.turn.ready", + }, 1) + assertMetricCount(t, reader, "notification.route.dead_letters", map[string]string{ + "channel": "email", + "notification_type": "game.turn.ready", + "failure_classification": "mail_stream_publish_failed", + }, 1) + assertGaugeValue(t, reader, "notification.route_schedule.depth", nil, 3) + assertGaugePositive(t, reader, "notification.route_schedule.oldest_age_ms", nil) + assertGaugePositive(t, reader, "notification.intent_stream.oldest_unprocessed_age_ms", nil) +} + +func assertMetricCount(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) { + t.Helper() + + var resourceMetrics metricdata.ResourceMetrics + require.NoError(t, reader.Collect(context.Background(), &resourceMetrics)) + + for _, scopeMetrics := range resourceMetrics.ScopeMetrics { + for _, metric := range scopeMetrics.Metrics { + if metric.Name != metricName { + continue + } + + sum, ok := metric.Data.(metricdata.Sum[int64]) + require.True(t, ok) + + for _, point := range sum.DataPoints { + if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) { + assert.Equal(t, wantValue, point.Value) + return + } + } + } + } + + require.Failf(t, "test failed", "metric %q with attrs %v not found", metricName, wantAttrs) +} + +func assertGaugeValue(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) { + t.Helper() + + var resourceMetrics metricdata.ResourceMetrics + require.NoError(t, reader.Collect(context.Background(), &resourceMetrics)) + + for _, scopeMetrics := range resourceMetrics.ScopeMetrics { + for _, metric := range scopeMetrics.Metrics { + if metric.Name != metricName { + continue + } + + gauge, ok := metric.Data.(metricdata.Gauge[int64]) + require.True(t, ok) + + for _, point := range gauge.DataPoints { + if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) { + assert.Equal(t, wantValue, point.Value) + return + } + } + } + } + + require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs) +} + +func assertGaugePositive(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string) { + t.Helper() + + var resourceMetrics metricdata.ResourceMetrics + require.NoError(t, reader.Collect(context.Background(), &resourceMetrics)) + + for _, scopeMetrics := range resourceMetrics.ScopeMetrics { + for _, metric := range scopeMetrics.Metrics { + if metric.Name != metricName { + continue + } + + gauge, ok := metric.Data.(metricdata.Gauge[int64]) + require.True(t, ok) + + for _, point := range gauge.DataPoints { + if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) { + assert.Greater(t, point.Value, int64(0)) + return + } + } + } + } + + require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs) +} + +func hasMetricAttributes(values []attribute.KeyValue, want map[string]string) bool { + if len(want) == 0 { + return len(values) == 0 + } + if len(values) != len(want) { + return false + } + + for _, value := range values { + if want[string(value.Key)] != value.Value.AsString() { + return false + } + } + + return true +} + +type stubRouteScheduleSnapshotReader struct { + snapshot RouteScheduleSnapshot + err error +} + +func (reader stubRouteScheduleSnapshotReader) ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error) { + return reader.snapshot, reader.err +} + +type stubIntentStreamLagSnapshotReader struct { + snapshot IntentStreamLagSnapshot + err error +} + +func (reader stubIntentStreamLagSnapshotReader) ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error) { + return reader.snapshot, reader.err +} diff --git a/notification/internal/worker/doc.go b/notification/internal/worker/doc.go new file mode 100644 index 0000000..ad5cafe --- /dev/null +++ b/notification/internal/worker/doc.go @@ -0,0 +1,3 @@ +// Package worker provides the long-lived background components used by the +// runnable Notification Service process. +package worker diff --git a/notification/internal/worker/email_publisher.go b/notification/internal/worker/email_publisher.go new file mode 100644 index 0000000..95abb46 --- /dev/null +++ b/notification/internal/worker/email_publisher.go @@ -0,0 +1,421 @@ +package worker + +import ( + "context" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/logging" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/publishmail" +) + +const ( + emailFailureClassificationPayloadEncoding = "payload_encoding_failed" + emailFailureClassificationMailStreamWrite = "mail_stream_publish_failed" +) + +// EmailRouteStateStore describes the durable route-state operations required +// by EmailPublisher. +type EmailRouteStateStore interface { + // ListDueRoutes loads due scheduled routes. + ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error) + + // TryAcquireRouteLease attempts to acquire one temporary route lease. + TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error) + + // ReleaseRouteLease best-effort releases one temporary route lease. + ReleaseRouteLease(context.Context, string, string, string) error + + // GetNotification loads one accepted notification. + GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error) + + // GetRoute loads one accepted notification route. + GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error) + + // CompleteRoutePublished records one successful publication. + CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error + + // CompleteRouteFailed records one retryable publication failure. + CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error + + // CompleteRouteDeadLetter records one exhausted publication failure. + CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error +} + +// EmailCommandEncoder encodes one email-capable notification route into a +// Mail Service-compatible generic command. +type EmailCommandEncoder interface { + // Encode converts notification plus route to one outbound command. + Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishmail.Command, error) +} + +// EmailPublisherConfig stores the dependencies and policies used by +// EmailPublisher. +type EmailPublisherConfig struct { + // Store owns the durable route-state transitions. + Store EmailRouteStateStore + + // MailDeliveryCommandsStream stores the outbound Mail Service command + // stream name. + MailDeliveryCommandsStream string + + // RouteLeaseTTL stores the temporary route-lease lifetime. + RouteLeaseTTL time.Duration + + // RouteBackoffMin stores the minimum retry backoff. + RouteBackoffMin time.Duration + + // RouteBackoffMax stores the maximum retry backoff. + RouteBackoffMax time.Duration + + // PollInterval stores how long the worker waits before the next due-route + // scan when no progress was made. + PollInterval time.Duration + + // BatchSize stores the maximum number of due schedule members loaded per + // scan. + BatchSize int64 + + // Encoder stores the email command encoder. + Encoder EmailCommandEncoder + + // Telemetry records route publication counters. + Telemetry RoutePublisherTelemetry + + // Clock provides wall-clock timestamps. + Clock Clock +} + +// EmailPublisher publishes due email routes into the Mail Service command +// stream with retry and dead-letter handling. +type EmailPublisher struct { + store EmailRouteStateStore + mailDeliveryCommandsStream string + routeLeaseTTL time.Duration + routeBackoffMin time.Duration + routeBackoffMax time.Duration + pollInterval time.Duration + batchSize int64 + encoder EmailCommandEncoder + telemetry RoutePublisherTelemetry + clock Clock + workerToken string + logger *slog.Logger +} + +// NewEmailPublisher constructs the email publication worker. +func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPublisher, error) { + switch { + case cfg.Store == nil: + return nil, errors.New("new email publisher: nil store") + case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "": + return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty") + case cfg.RouteLeaseTTL <= 0: + return nil, errors.New("new email publisher: route lease ttl must be positive") + case cfg.RouteBackoffMin <= 0: + return nil, errors.New("new email publisher: route backoff min must be positive") + case cfg.RouteBackoffMax <= 0: + return nil, errors.New("new email publisher: route backoff max must be positive") + case cfg.RouteBackoffMin > cfg.RouteBackoffMax: + return nil, errors.New("new email publisher: route backoff min must not exceed route backoff max") + } + if cfg.PollInterval <= 0 { + cfg.PollInterval = defaultPushPublisherPollInterval + } + if cfg.BatchSize <= 0 { + cfg.BatchSize = defaultPushPublisherBatchSize + } + if cfg.Clock == nil { + cfg.Clock = systemClock{} + } + if cfg.Encoder == nil { + cfg.Encoder = publishmail.Encoder{} + } + if logger == nil { + logger = slog.Default() + } + + workerToken, err := newWorkerToken() + if err != nil { + return nil, fmt.Errorf("new email publisher: %w", err) + } + + return &EmailPublisher{ + store: cfg.Store, + mailDeliveryCommandsStream: cfg.MailDeliveryCommandsStream, + routeLeaseTTL: cfg.RouteLeaseTTL, + routeBackoffMin: cfg.RouteBackoffMin, + routeBackoffMax: cfg.RouteBackoffMax, + pollInterval: cfg.PollInterval, + batchSize: cfg.BatchSize, + encoder: cfg.Encoder, + telemetry: cfg.Telemetry, + clock: cfg.Clock, + workerToken: workerToken, + logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream), + }, nil +} + +// Run starts the email publication loop and blocks until ctx is canceled or +// an unexpected publication error occurs. +func (publisher *EmailPublisher) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run email publisher: nil context") + } + if err := ctx.Err(); err != nil { + return err + } + if publisher == nil { + return errors.New("run email publisher: nil publisher") + } + + publisher.logger.Info("email publisher started", + "poll_interval", publisher.pollInterval.String(), + "batch_size", publisher.batchSize, + ) + + for { + progress, err := publisher.publishDueRoutes(ctx) + switch { + case err == nil && progress: + continue + case err == nil: + if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil { + publisher.logger.Info("email publisher stopped") + return waitErr + } + case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)): + publisher.logger.Info("email publisher stopped") + return ctx.Err() + default: + return fmt.Errorf("run email publisher: %w", err) + } + } +} + +// Shutdown stops the email publisher within ctx. The worker relies on context +// cancellation and a bounded polling interval, so it has no dedicated +// resources to release here. +func (publisher *EmailPublisher) Shutdown(ctx context.Context) error { + if ctx == nil { + return errors.New("shutdown email publisher: nil context") + } + if publisher == nil { + return nil + } + + return nil +} + +func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, error) { + now := publisher.now() + + dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize) + if err != nil { + return false, err + } + + progress := false + for _, dueRoute := range dueRoutes { + if !strings.HasPrefix(dueRoute.RouteID, "email:") { + continue + } + + processed, err := publisher.publishRoute(ctx, now, dueRoute) + if err != nil { + return progress, err + } + progress = progress || processed + } + + return progress, nil +} + +func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) { + acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL) + if err != nil { + return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err) + } + if !acquired { + return false, nil + } + defer func() { + releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL) + defer cancel() + _ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken) + }() + + notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID) + if err != nil { + return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err) + } + if !found { + return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID) + } + + route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID) + if err != nil { + return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err) + } + if !found { + return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID) + } + if route.Channel != intentstream.ChannelEmail { + return false, nil + } + switch route.Status { + case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed: + default: + return false, nil + } + if route.NextAttemptAt.After(now) { + return false, nil + } + + command, err := publisher.encoder.Encode(notification, route) + if err != nil { + return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error()) + } + + err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + PublishedAt: publisher.now(), + Stream: publisher.mailDeliveryCommandsStream, + StreamMaxLen: 0, + StreamValues: command.Values(), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "published", "") + logArgs := logging.RouteAttrs( + notification.NotificationID, + notification.NotificationType, + notification.Producer, + notification.AudienceKind, + notification.IdempotencyKey, + notification.RequestID, + notification.TraceID, + route.RouteID, + route.Channel, + ) + logArgs = append(logArgs, + "delivery_id", command.DeliveryID, + "resolved_email", route.ResolvedEmail, + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + publisher.logger.Info("email route published", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error()) + } +} + +func (publisher *EmailPublisher) recordFailure( + ctx context.Context, + notification acceptintent.NotificationRecord, + route acceptintent.NotificationRoute, + classification string, + message string, +) (bool, error) { + failureAt := publisher.now() + attemptNumber := route.AttemptCount + 1 + logArgs := logging.RouteAttrs( + notification.NotificationID, + notification.NotificationType, + notification.Producer, + notification.AudienceKind, + notification.IdempotencyKey, + notification.RequestID, + notification.TraceID, + route.RouteID, + route.Channel, + ) + logArgs = append(logArgs, + "resolved_email", route.ResolvedEmail, + "failure_classification", classification, + "failure_message", strings.TrimSpace(message), + "attempt_number", attemptNumber, + "max_attempts", route.MaxAttempts, + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + + if attemptNumber >= route.MaxAttempts { + err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + DeadLetteredAt: failureAt, + FailureClassification: classification, + FailureMessage: strings.TrimSpace(message), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification) + publisher.recordRouteDeadLetter(ctx, notification, route, classification) + publisher.logger.Warn("email route dead-lettered", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err) + } + } + + nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond) + err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + FailedAt: failureAt, + NextAttemptAt: nextAttemptAt, + FailureClassification: classification, + FailureMessage: strings.TrimSpace(message), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "retry", classification) + publisher.recordRouteRetry(ctx, notification, route) + logArgs = append(logArgs, "next_attempt_at", nextAttemptAt) + publisher.logger.Warn("email route failed and was rescheduled", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err) + } +} + +func (publisher *EmailPublisher) now() time.Time { + return publisher.clock.Now().UTC().Truncate(time.Millisecond) +} + +func (publisher *EmailPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification) +} + +func (publisher *EmailPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType)) +} + +func (publisher *EmailPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification) +} diff --git a/notification/internal/worker/email_publisher_test.go b/notification/internal/worker/email_publisher_test.go new file mode 100644 index 0000000..a6a278c --- /dev/null +++ b/notification/internal/worker/email_publisher_test.go @@ -0,0 +1,232 @@ +package worker + +import ( + "context" + "testing" + "time" + + redisstate "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/service/acceptintent" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" +) + +func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) { + t.Parallel() + + fixture := newEmailPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0))) + + running := runEmailPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished + }, time.Second, 10*time.Millisecond) + + pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status) + + messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"]) + require.Equal(t, "notification", messages[0].Values["source"]) + require.Equal(t, "template", messages[0].Values["payload_mode"]) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", "")) +} + +func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) { + t.Parallel() + + fixture := newEmailPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0))) + require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err()) + + running := runEmailPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1 + }, time.Second, 10*time.Millisecond) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite)) + require.True(t, fixture.telemetry.hasRouteRetry("email")) + + require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err()) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2 + }, 2*time.Second, 10*time.Millisecond) + + messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", "")) +} + +func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) { + t.Parallel() + + fixture := newEmailPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0))) + + otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{ + Store: fixture.store, + MailDeliveryCommandsStream: fixture.mailStream, + RouteLeaseTTL: 200 * time.Millisecond, + RouteBackoffMin: 20 * time.Millisecond, + RouteBackoffMax: 20 * time.Millisecond, + PollInterval: 10 * time.Millisecond, + BatchSize: 16, + Clock: newSteppingClock(fixture.now, time.Millisecond), + }, testWorkerLogger()) + require.NoError(t, err) + + first := runEmailPublisher(t, fixture.publisher) + defer first.stop(t) + second := runEmailPublisher(t, otherPublisher) + defer second.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished + }, time.Second, 10*time.Millisecond) + + messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) +} + +func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) { + t.Parallel() + + fixture := newEmailPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6))) + require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err()) + + running := runEmailPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7 + }, time.Second, 10*time.Millisecond) + + deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes() + require.NoError(t, err) + deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload) + require.NoError(t, err) + require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite)) + require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite)) +} + +type emailPublisherFixture struct { + client *redis.Client + store *redisstate.AcceptanceStore + publisher *EmailPublisher + mailStream string + now time.Time + clock *steppingClock + telemetry *recordingWorkerTelemetry +} + +func newEmailPublisherFixture(t *testing.T) emailPublisherFixture { + t.Helper() + + server := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{ + Addr: server.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + require.NoError(t, client.Close()) + }) + + store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + clock := newSteppingClock(now, time.Millisecond) + telemetry := &recordingWorkerTelemetry{} + publisher, err := NewEmailPublisher(EmailPublisherConfig{ + Store: store, + MailDeliveryCommandsStream: "mail:delivery_commands", + RouteLeaseTTL: 200 * time.Millisecond, + RouteBackoffMin: 20 * time.Millisecond, + RouteBackoffMax: 20 * time.Millisecond, + PollInterval: 10 * time.Millisecond, + BatchSize: 16, + Telemetry: telemetry, + Clock: clock, + }, testWorkerLogger()) + require.NoError(t, err) + + return emailPublisherFixture{ + client: client, + store: store, + publisher: publisher, + mailStream: "mail:delivery_commands", + now: now, + clock: clock, + telemetry: telemetry, + } +} + +func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput { + input := validPushAcceptanceInput(now) + for index := range input.Routes { + if input.Routes[index].RouteID != "email:user:user-1" { + continue + } + input.Routes[index].AttemptCount = emailAttemptCount + input.Routes[index].MaxAttempts = 7 + } + + return input +} + +type runningEmailPublisher struct { + cancel context.CancelFunc + resultCh chan error +} + +func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher { + t.Helper() + + ctx, cancel := context.WithCancel(context.Background()) + resultCh := make(chan error, 1) + go func() { + resultCh <- publisher.Run(ctx) + }() + + return runningEmailPublisher{ + cancel: cancel, + resultCh: resultCh, + } +} + +func (r runningEmailPublisher) stop(t *testing.T) { + t.Helper() + + r.cancel() + + select { + case err := <-r.resultCh: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + require.FailNow(t, "email publisher did not stop") + } +} diff --git a/notification/internal/worker/intent_consumer.go b/notification/internal/worker/intent_consumer.go new file mode 100644 index 0000000..260b86d --- /dev/null +++ b/notification/internal/worker/intent_consumer.go @@ -0,0 +1,331 @@ +package worker + +import ( + "context" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/logging" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/malformedintent" + + "github.com/redis/go-redis/v9" +) + +// AcceptIntentUseCase accepts one normalized notification intent. +type AcceptIntentUseCase interface { + // Execute durably accepts one normalized notification intent. + Execute(context.Context, acceptintent.AcceptInput) (acceptintent.Result, error) +} + +// MalformedIntentRecorder stores one operator-visible malformed-intent record. +type MalformedIntentRecorder interface { + // Record persists entry idempotently by stream entry id. + Record(context.Context, malformedintent.Entry) error +} + +// StreamOffsetStore stores the last durably processed entry id of one plain +// XREAD consumer. +type StreamOffsetStore interface { + // Load returns the last processed entry id for stream when one is stored. + Load(context.Context, string) (string, bool, error) + + // Save stores the last processed entry id for stream. + Save(context.Context, string, string) error +} + +// IntentConsumerTelemetry records low-cardinality stream-consumer events. +type IntentConsumerTelemetry interface { + // RecordMalformedIntent records one malformed or rejected notification + // intent. + RecordMalformedIntent(context.Context, string, string, string) +} + +// Clock provides the current wall-clock time. +type Clock interface { + // Now returns the current time. + Now() time.Time +} + +type systemClock struct{} + +func (systemClock) Now() time.Time { + return time.Now() +} + +// IntentConsumerConfig stores the dependencies used by IntentConsumer. +type IntentConsumerConfig struct { + // Client stores the Redis client used for XREAD. + Client *redis.Client + + // Stream stores the Redis Stream name to consume. + Stream string + + // BlockTimeout stores the blocking XREAD timeout. + BlockTimeout time.Duration + + // Acceptor durably accepts valid notification intents. + Acceptor AcceptIntentUseCase + + // MalformedRecorder persists operator-visible malformed-intent entries. + MalformedRecorder MalformedIntentRecorder + + // OffsetStore stores the last durably processed stream entry id. + OffsetStore StreamOffsetStore + + // Telemetry records malformed-intent counters. + Telemetry IntentConsumerTelemetry + + // Clock provides wall-clock timestamps for malformed-intent records. + Clock Clock +} + +// IntentConsumer stores the Redis Streams consumer used for notification +// intent intake. +type IntentConsumer struct { + client *redis.Client + stream string + blockTimeout time.Duration + acceptor AcceptIntentUseCase + malformedRecorder MalformedIntentRecorder + offsetStore StreamOffsetStore + telemetry IntentConsumerTelemetry + clock Clock + logger *slog.Logger +} + +// NewIntentConsumer constructs the notification-intent consumer. +func NewIntentConsumer(cfg IntentConsumerConfig, logger *slog.Logger) (*IntentConsumer, error) { + switch { + case cfg.Client == nil: + return nil, errors.New("new intent consumer: nil redis client") + case strings.TrimSpace(cfg.Stream) == "": + return nil, errors.New("new intent consumer: stream must not be empty") + case cfg.BlockTimeout <= 0: + return nil, errors.New("new intent consumer: block timeout must be positive") + case cfg.Acceptor == nil: + return nil, errors.New("new intent consumer: nil acceptor") + case cfg.MalformedRecorder == nil: + return nil, errors.New("new intent consumer: nil malformed recorder") + case cfg.OffsetStore == nil: + return nil, errors.New("new intent consumer: nil offset store") + } + if cfg.Clock == nil { + cfg.Clock = systemClock{} + } + if logger == nil { + logger = slog.Default() + } + + return &IntentConsumer{ + client: cfg.Client, + stream: cfg.Stream, + blockTimeout: cfg.BlockTimeout, + acceptor: cfg.Acceptor, + malformedRecorder: cfg.MalformedRecorder, + offsetStore: cfg.OffsetStore, + telemetry: cfg.Telemetry, + clock: cfg.Clock, + logger: logger.With("component", "intent_consumer", "stream", cfg.Stream), + }, nil +} + +// Run starts the intent consumer and blocks until ctx is canceled or Redis +// returns an unexpected error. +func (consumer *IntentConsumer) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run intent consumer: nil context") + } + if err := ctx.Err(); err != nil { + return err + } + if consumer == nil || consumer.client == nil { + return errors.New("run intent consumer: nil consumer") + } + + lastID, found, err := consumer.offsetStore.Load(ctx, consumer.stream) + if err != nil { + return fmt.Errorf("run intent consumer: load stream offset: %w", err) + } + if !found { + lastID = "0-0" + } + + consumer.logger.Info("intent consumer started", "block_timeout", consumer.blockTimeout.String(), "start_entry_id", lastID) + + for { + streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{ + Streams: []string{consumer.stream, lastID}, + Count: 1, + Block: consumer.blockTimeout, + }).Result() + switch { + case err == nil: + for _, stream := range streams { + for _, message := range stream.Messages { + if err := consumer.handleMessage(ctx, message); err != nil { + return err + } + if err := consumer.offsetStore.Save(ctx, consumer.stream, message.ID); err != nil { + return fmt.Errorf("run intent consumer: save stream offset: %w", err) + } + lastID = message.ID + } + } + case errors.Is(err, redis.Nil): + continue + case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)): + consumer.logger.Info("intent consumer stopped") + return ctx.Err() + case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed): + return fmt.Errorf("run intent consumer: %w", err) + default: + return fmt.Errorf("run intent consumer: %w", err) + } + } +} + +func (consumer *IntentConsumer) handleMessage(ctx context.Context, message redis.XMessage) error { + rawFields := cloneRawFields(message.Values) + + intent, err := intentstream.DecodeIntent(rawFields) + if err != nil { + return consumer.recordMalformed( + ctx, + message.ID, + rawFields, + intentstream.ClassifyDecodeError(err), + err, + ) + } + + result, err := consumer.acceptor.Execute(ctx, acceptintent.AcceptInput{ + NotificationID: message.ID, + Intent: intent, + }) + switch { + case err == nil: + logArgs := []any{ + "stream_entry_id", message.ID, + "notification_id", message.ID, + } + logArgs = append(logArgs, logging.IntentAttrs(intent)...) + logArgs = append(logArgs, + "outcome", string(result.Outcome), + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + consumer.logger.Info("notification intent handled", logArgs...) + return nil + case errors.Is(err, acceptintent.ErrConflict): + return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeIdempotencyConflict, err) + case errors.Is(err, acceptintent.ErrRecipientNotFound): + return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeRecipientNotFound, err) + case errors.Is(err, acceptintent.ErrServiceUnavailable): + return fmt.Errorf("handle intent %q: %w", message.ID, err) + default: + return fmt.Errorf("handle intent %q: %w", message.ID, err) + } +} + +func (consumer *IntentConsumer) recordMalformed( + ctx context.Context, + streamEntryID string, + rawFields map[string]any, + failureCode malformedintent.FailureCode, + cause error, +) error { + entry := malformedintent.Entry{ + StreamEntryID: streamEntryID, + NotificationType: optionalRawString(rawFields, "notification_type"), + Producer: optionalRawString(rawFields, "producer"), + IdempotencyKey: optionalRawString(rawFields, "idempotency_key"), + FailureCode: failureCode, + FailureMessage: strings.TrimSpace(cause.Error()), + RawFields: cloneRawFields(rawFields), + RecordedAt: consumer.clock.Now().UTC().Truncate(time.Millisecond), + } + if err := consumer.malformedRecorder.Record(ctx, entry); err != nil { + return fmt.Errorf("record malformed intent %q: %w", streamEntryID, err) + } + if consumer.telemetry != nil { + consumer.telemetry.RecordMalformedIntent(ctx, string(failureCode), entry.NotificationType, entry.Producer) + } + + logArgs := []any{ + "stream_entry_id", streamEntryID, + "notification_type", entry.NotificationType, + "producer", entry.Producer, + "idempotency_key", entry.IdempotencyKey, + "failure_code", string(entry.FailureCode), + "failure_message", entry.FailureMessage, + } + if traceID := optionalRawString(rawFields, "trace_id"); traceID != "" { + logArgs = append(logArgs, "trace_id", traceID) + } + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + consumer.logger.Warn("notification intent rejected", logArgs...) + + return nil +} + +func cloneRawFields(values map[string]any) map[string]any { + if values == nil { + return map[string]any{} + } + + cloned := make(map[string]any, len(values)) + for key, value := range values { + cloned[key] = cloneRawValue(value) + } + + return cloned +} + +func cloneRawValue(value any) any { + switch typed := value.(type) { + case map[string]any: + return cloneRawFields(typed) + case []any: + cloned := make([]any, len(typed)) + for index, item := range typed { + cloned[index] = cloneRawValue(item) + } + return cloned + default: + return typed + } +} + +func optionalRawString(values map[string]any, key string) string { + raw, ok := values[key] + if !ok { + return "" + } + + switch typed := raw.(type) { + case string: + return typed + case []byte: + return string(typed) + default: + return "" + } +} + +// Shutdown stops the intent consumer within ctx. The consumer relies on +// context cancellation and a bounded block timeout, so it has no dedicated +// resources to release here. +func (consumer *IntentConsumer) Shutdown(ctx context.Context) error { + if ctx == nil { + return errors.New("shutdown intent consumer: nil context") + } + if consumer == nil { + return nil + } + + return nil +} diff --git a/notification/internal/worker/intent_consumer_test.go b/notification/internal/worker/intent_consumer_test.go new file mode 100644 index 0000000..a77ff97 --- /dev/null +++ b/notification/internal/worker/intent_consumer_test.go @@ -0,0 +1,422 @@ +package worker + +import ( + "context" + "errors" + "io" + "log/slog" + "testing" + "time" + + redisstate "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/config" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/malformedintent" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + records: map[string]acceptintent.UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "en"}, + }, + }) + messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + _, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID) + return err == nil && found + }, time.Second, 10*time.Millisecond) +} + +func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + records: map[string]acceptintent.UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "en"}, + }, + }) + firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID)) + secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + _, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID) + return err == nil && found + }, time.Second, 10*time.Millisecond) + + _, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID) + require.NoError(t, err) + require.False(t, found) +} + +func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + records: map[string]acceptintent.UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "en"}, + }, + }) + firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes() + if err != nil { + return false + } + entry, err := redisstate.UnmarshalMalformedIntent(payload) + if err != nil { + return false + } + return entry.FailureCode == "idempotency_conflict" + }, time.Second, 10*time.Millisecond) + + offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, secondID, offset) + + _, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID) + require.NoError(t, err) + require.True(t, found) + + _, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID) + require.NoError(t, err) + require.False(t, found) +} + +func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{}) + + ctx, cancel := context.WithCancel(context.Background()) + resultCh := make(chan error, 1) + go func() { + resultCh <- fixture.consumer.Run(ctx) + }() + + time.Sleep(50 * time.Millisecond) + cancel() + + select { + case err := <-resultCh: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + require.FailNow(t, "intent consumer did not stop after shutdown") + } +} + +func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{}) + messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes() + if err != nil { + return false + } + entry, err := redisstate.UnmarshalMalformedIntent(payload) + if err != nil { + return false + } + return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound + }, time.Second, 10*time.Millisecond) + + offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, messageID, offset) + + _, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID) + require.NoError(t, err) + require.False(t, found) +} + +func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + records: map[string]acceptintent.UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "en"}, + }, + }) + messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{ + Stream: fixture.stream, + Values: map[string]any{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "recipient_user_ids_json": `["user-1"]`, + "idempotency_key": "game-123:turn-ready", + "occurred_at_ms": "1775121700000", + }, + }).Result() + require.NoError(t, err) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes() + if err != nil { + return false + } + entry, err := redisstate.UnmarshalMalformedIntent(payload) + if err != nil { + return false + } + return entry.FailureCode == malformedintent.FailureCodeInvalidPayload && + entry.StreamEntryID == messageID + }, time.Second, 10*time.Millisecond) + + offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream) + require.NoError(t, err) + require.True(t, found) + require.Equal(t, messageID, offset) + + _, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID) + require.NoError(t, err) + require.False(t, found) +} + +func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + records: map[string]acceptintent.UserRecord{ + "user-1": {Email: "pilot@example.com", PreferredLanguage: "en"}, + }, + }) + addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`) + + running := runIntentConsumer(t, fixture.consumer) + defer running.stop(t) + + require.Eventually(t, func() bool { + payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes() + if err != nil { + return false + } + entry, err := redisstate.UnmarshalMalformedIntent(payload) + if err != nil { + return false + } + return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict + }, time.Second, 10*time.Millisecond) + + require.Eventually(t, func() bool { + return fixture.telemetry.hasIntentOutcome("accepted") && + fixture.telemetry.hasIntentOutcome("duplicate") && + fixture.telemetry.hasMalformedIntent("idempotency_conflict") + }, time.Second, 10*time.Millisecond) +} + +func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) { + t.Parallel() + + fixture := newIntentConsumerFixture(t, stubUserDirectory{ + err: errors.New("user service unavailable"), + }) + messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resultCh := make(chan error, 1) + go func() { + resultCh <- fixture.consumer.Run(ctx) + }() + + var runErr error + require.Eventually(t, func() bool { + select { + case runErr = <-resultCh: + return true + default: + return false + } + }, time.Second, 10*time.Millisecond) + + require.Error(t, runErr) + require.ErrorContains(t, runErr, "user service unavailable") + + _, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream) + require.NoError(t, err) + require.False(t, found) + + _, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID) + require.NoError(t, err) + require.False(t, found) +} + +type intentConsumerFixture struct { + client *redis.Client + stream string + acceptanceStore *redisstate.AcceptanceStore + offsetStore *redisstate.StreamOffsetStore + consumer *IntentConsumer + telemetry *recordingWorkerTelemetry +} + +func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture { + t.Helper() + + server := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{ + Addr: server.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, client.Close()) + }) + + acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour) + require.NoError(t, err) + offsetStore, err := redisstate.NewStreamOffsetStore(client) + require.NoError(t, err) + telemetry := &recordingWorkerTelemetry{} + service, err := acceptintent.New(acceptintent.Config{ + Store: acceptanceStore, + UserDirectory: userDirectory, + Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()}, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + Telemetry: telemetry, + PushMaxAttempts: 3, + EmailMaxAttempts: 7, + IdempotencyTTL: 7 * 24 * time.Hour, + AdminRouting: config.AdminRoutingConfig{}, + }) + require.NoError(t, err) + consumer, err := NewIntentConsumer(IntentConsumerConfig{ + Client: client, + Stream: "notification:intents", + BlockTimeout: 25 * time.Millisecond, + Acceptor: service, + MalformedRecorder: malformedStore, + OffsetStore: offsetStore, + Telemetry: telemetry, + Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()}, + }, slog.New(slog.NewTextHandler(io.Discard, nil))) + require.NoError(t, err) + + return intentConsumerFixture{ + client: client, + stream: "notification:intents", + acceptanceStore: acceptanceStore, + offsetStore: offsetStore, + consumer: consumer, + telemetry: telemetry, + } +} + +func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string { + t.Helper() + + messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{ + Stream: stream, + Values: map[string]any{ + "notification_type": "game.turn.ready", + "producer": "game_master", + "audience_kind": "user", + "recipient_user_ids_json": `["user-1"]`, + "idempotency_key": "game-123:turn-ready", + "occurred_at_ms": "1775121700000", + "payload_json": payloadJSON, + }, + }).Result() + require.NoError(t, err) + + return messageID +} + +type runningIntentConsumer struct { + cancel context.CancelFunc + resultCh chan error +} + +func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer { + t.Helper() + + ctx, cancel := context.WithCancel(context.Background()) + resultCh := make(chan error, 1) + go func() { + resultCh <- consumer.Run(ctx) + }() + + time.Sleep(50 * time.Millisecond) + + return runningIntentConsumer{ + cancel: cancel, + resultCh: resultCh, + } +} + +func (r runningIntentConsumer) stop(t *testing.T) { + t.Helper() + + r.cancel() + + select { + case err := <-r.resultCh: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + require.FailNow(t, "intent consumer did not stop") + } +} + +type fixedClock struct { + now time.Time +} + +func (clock fixedClock) Now() time.Time { + return clock.now +} + +type stubUserDirectory struct { + records map[string]acceptintent.UserRecord + err error +} + +func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) { + if directory.err != nil { + return acceptintent.UserRecord{}, directory.err + } + if record, ok := directory.records[userID]; ok { + return record, nil + } + + return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound +} diff --git a/notification/internal/worker/push_publisher.go b/notification/internal/worker/push_publisher.go new file mode 100644 index 0000000..e6fec58 --- /dev/null +++ b/notification/internal/worker/push_publisher.go @@ -0,0 +1,499 @@ +package worker + +import ( + "context" + "crypto/rand" + "encoding/hex" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/logging" + "galaxy/notification/internal/service/acceptintent" + "galaxy/notification/internal/service/publishpush" +) + +const ( + defaultPushPublisherPollInterval = 100 * time.Millisecond + defaultPushPublisherBatchSize = 64 + + pushFailureClassificationPayloadEncoding = "payload_encoding_failed" + pushFailureClassificationGatewayStreamWrite = "gateway_stream_publish_failed" +) + +// PushRouteStateStore describes the durable route-state operations required by +// PushPublisher. +type PushRouteStateStore interface { + // ListDueRoutes loads due scheduled routes. + ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error) + + // TryAcquireRouteLease attempts to acquire one temporary route lease. + TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error) + + // ReleaseRouteLease best-effort releases one temporary route lease. + ReleaseRouteLease(context.Context, string, string, string) error + + // GetNotification loads one accepted notification. + GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error) + + // GetRoute loads one accepted notification route. + GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error) + + // CompleteRoutePublished records one successful publication. + CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error + + // CompleteRouteFailed records one retryable publication failure. + CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error + + // CompleteRouteDeadLetter records one exhausted publication failure. + CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error +} + +// PushEventEncoder encodes one push-capable notification route into a +// Gateway-compatible client event. +type PushEventEncoder interface { + // Encode converts notification plus route to one outbound event. + Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishpush.Event, error) +} + +// RoutePublisherTelemetry records low-cardinality route publication outcomes. +type RoutePublisherTelemetry interface { + // RecordRoutePublishAttempt records one route publication attempt outcome. + RecordRoutePublishAttempt(context.Context, string, string, string, string) + + // RecordRouteRetry records one route retry scheduling event. + RecordRouteRetry(context.Context, string, string) + + // RecordRouteDeadLetter records one route transition to dead_letter. + RecordRouteDeadLetter(context.Context, string, string, string) +} + +// PushPublisherConfig stores the dependencies and policies used by +// PushPublisher. +type PushPublisherConfig struct { + // Store owns the durable route-state transitions. + Store PushRouteStateStore + + // GatewayStream stores the outbound Gateway client-events stream name. + GatewayStream string + + // GatewayStreamMaxLen bounds GatewayStream with approximate trimming. + GatewayStreamMaxLen int64 + + // RouteLeaseTTL stores the temporary route-lease lifetime. + RouteLeaseTTL time.Duration + + // RouteBackoffMin stores the minimum retry backoff. + RouteBackoffMin time.Duration + + // RouteBackoffMax stores the maximum retry backoff. + RouteBackoffMax time.Duration + + // PollInterval stores how long the worker waits before the next due-route + // scan when no progress was made. + PollInterval time.Duration + + // BatchSize stores the maximum number of due schedule members loaded per + // scan. + BatchSize int64 + + // Encoder stores the push payload encoder. + Encoder PushEventEncoder + + // Telemetry records route publication counters. + Telemetry RoutePublisherTelemetry + + // Clock provides wall-clock timestamps. + Clock Clock +} + +// PushPublisher publishes due push routes into the Gateway client-events +// stream with retry and dead-letter handling. +type PushPublisher struct { + store PushRouteStateStore + gatewayStream string + gatewayStreamMaxLen int64 + routeLeaseTTL time.Duration + routeBackoffMin time.Duration + routeBackoffMax time.Duration + pollInterval time.Duration + batchSize int64 + encoder PushEventEncoder + telemetry RoutePublisherTelemetry + clock Clock + workerToken string + logger *slog.Logger +} + +// NewPushPublisher constructs the push publication worker. +func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublisher, error) { + switch { + case cfg.Store == nil: + return nil, errors.New("new push publisher: nil store") + case strings.TrimSpace(cfg.GatewayStream) == "": + return nil, errors.New("new push publisher: gateway stream must not be empty") + case cfg.GatewayStreamMaxLen <= 0: + return nil, errors.New("new push publisher: gateway stream max len must be positive") + case cfg.RouteLeaseTTL <= 0: + return nil, errors.New("new push publisher: route lease ttl must be positive") + case cfg.RouteBackoffMin <= 0: + return nil, errors.New("new push publisher: route backoff min must be positive") + case cfg.RouteBackoffMax <= 0: + return nil, errors.New("new push publisher: route backoff max must be positive") + case cfg.RouteBackoffMin > cfg.RouteBackoffMax: + return nil, errors.New("new push publisher: route backoff min must not exceed route backoff max") + } + if cfg.PollInterval <= 0 { + cfg.PollInterval = defaultPushPublisherPollInterval + } + if cfg.BatchSize <= 0 { + cfg.BatchSize = defaultPushPublisherBatchSize + } + if cfg.Clock == nil { + cfg.Clock = systemClock{} + } + if cfg.Encoder == nil { + cfg.Encoder = publishpush.Encoder{} + } + if logger == nil { + logger = slog.Default() + } + + workerToken, err := newWorkerToken() + if err != nil { + return nil, fmt.Errorf("new push publisher: %w", err) + } + + return &PushPublisher{ + store: cfg.Store, + gatewayStream: cfg.GatewayStream, + gatewayStreamMaxLen: cfg.GatewayStreamMaxLen, + routeLeaseTTL: cfg.RouteLeaseTTL, + routeBackoffMin: cfg.RouteBackoffMin, + routeBackoffMax: cfg.RouteBackoffMax, + pollInterval: cfg.PollInterval, + batchSize: cfg.BatchSize, + encoder: cfg.Encoder, + telemetry: cfg.Telemetry, + clock: cfg.Clock, + workerToken: workerToken, + logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream), + }, nil +} + +// Run starts the push publication loop and blocks until ctx is canceled or an +// unexpected publication error occurs. +func (publisher *PushPublisher) Run(ctx context.Context) error { + if ctx == nil { + return errors.New("run push publisher: nil context") + } + if err := ctx.Err(); err != nil { + return err + } + if publisher == nil { + return errors.New("run push publisher: nil publisher") + } + + publisher.logger.Info("push publisher started", + "poll_interval", publisher.pollInterval.String(), + "batch_size", publisher.batchSize, + ) + + for { + progress, err := publisher.publishDueRoutes(ctx) + switch { + case err == nil && progress: + continue + case err == nil: + if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil { + publisher.logger.Info("push publisher stopped") + return waitErr + } + case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)): + publisher.logger.Info("push publisher stopped") + return ctx.Err() + default: + return fmt.Errorf("run push publisher: %w", err) + } + } +} + +// Shutdown stops the push publisher within ctx. The worker relies on context +// cancellation and a bounded polling interval, so it has no dedicated +// resources to release here. +func (publisher *PushPublisher) Shutdown(ctx context.Context) error { + if ctx == nil { + return errors.New("shutdown push publisher: nil context") + } + if publisher == nil { + return nil + } + + return nil +} + +func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, error) { + now := publisher.now() + + dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize) + if err != nil { + return false, err + } + + progress := false + for _, dueRoute := range dueRoutes { + if !strings.HasPrefix(dueRoute.RouteID, "push:") { + continue + } + + processed, err := publisher.publishRoute(ctx, now, dueRoute) + if err != nil { + return progress, err + } + progress = progress || processed + } + + return progress, nil +} + +func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) { + acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL) + if err != nil { + return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err) + } + if !acquired { + return false, nil + } + defer func() { + releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL) + defer cancel() + _ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken) + }() + + notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID) + if err != nil { + return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err) + } + if !found { + return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID) + } + + route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID) + if err != nil { + return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err) + } + if !found { + return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID) + } + if route.Channel != intentstream.ChannelPush { + return false, nil + } + switch route.Status { + case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed: + default: + return false, nil + } + if route.NextAttemptAt.After(now) { + return false, nil + } + + event, err := publisher.encoder.Encode(notification, route) + if err != nil { + return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error()) + } + + err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + PublishedAt: publisher.now(), + Stream: publisher.gatewayStream, + StreamMaxLen: publisher.gatewayStreamMaxLen, + StreamValues: eventValues(event), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "published", "") + logArgs := logging.RouteAttrs( + notification.NotificationID, + notification.NotificationType, + notification.Producer, + notification.AudienceKind, + notification.IdempotencyKey, + notification.RequestID, + notification.TraceID, + route.RouteID, + route.Channel, + ) + logArgs = append(logArgs, + "event_id", event.EventID, + "user_id", event.UserID, + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + publisher.logger.Info("push route published", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error()) + } +} + +func (publisher *PushPublisher) recordFailure( + ctx context.Context, + notification acceptintent.NotificationRecord, + route acceptintent.NotificationRoute, + classification string, + message string, +) (bool, error) { + failureAt := publisher.now() + attemptNumber := route.AttemptCount + 1 + logArgs := logging.RouteAttrs( + notification.NotificationID, + notification.NotificationType, + notification.Producer, + notification.AudienceKind, + notification.IdempotencyKey, + notification.RequestID, + notification.TraceID, + route.RouteID, + route.Channel, + ) + logArgs = append(logArgs, + "failure_classification", classification, + "failure_message", strings.TrimSpace(message), + "attempt_number", attemptNumber, + "max_attempts", route.MaxAttempts, + ) + logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...) + + if attemptNumber >= route.MaxAttempts { + err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + DeadLetteredAt: failureAt, + FailureClassification: classification, + FailureMessage: strings.TrimSpace(message), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification) + publisher.recordRouteDeadLetter(ctx, notification, route, classification) + publisher.logger.Warn("push route dead-lettered", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err) + } + } + + nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond) + err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{ + ExpectedRoute: route, + LeaseToken: publisher.workerToken, + FailedAt: failureAt, + NextAttemptAt: nextAttemptAt, + FailureClassification: classification, + FailureMessage: strings.TrimSpace(message), + }) + switch { + case err == nil: + publisher.recordPublishAttempt(ctx, notification, route, "retry", classification) + publisher.recordRouteRetry(ctx, notification, route) + logArgs = append(logArgs, "next_attempt_at", nextAttemptAt) + publisher.logger.Warn("push route failed and was rescheduled", logArgs...) + return true, nil + case errors.Is(err, redisstate.ErrConflict): + return false, nil + default: + return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err) + } +} + +func eventValues(event publishpush.Event) map[string]any { + values := map[string]any{ + "user_id": event.UserID, + "event_type": event.EventType, + "event_id": event.EventID, + "payload_bytes": append([]byte(nil), event.PayloadBytes...), + } + if event.RequestID != "" { + values["request_id"] = event.RequestID + } + if event.TraceID != "" { + values["trace_id"] = event.TraceID + } + + return values +} + +func routeBackoffDelay(attemptNumber int, minBackoff time.Duration, maxBackoff time.Duration) time.Duration { + delay := minBackoff + for step := 1; step < attemptNumber; step++ { + if delay >= maxBackoff/2 { + return maxBackoff + } + delay *= 2 + } + if delay < minBackoff { + return minBackoff + } + if delay > maxBackoff { + return maxBackoff + } + + return delay +} + +func waitWithContext(ctx context.Context, delay time.Duration) error { + timer := time.NewTimer(delay) + defer timer.Stop() + + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} + +func newWorkerToken() (string, error) { + buffer := make([]byte, 16) + if _, err := rand.Read(buffer); err != nil { + return "", fmt.Errorf("generate worker token: %w", err) + } + + return hex.EncodeToString(buffer), nil +} + +func (publisher *PushPublisher) now() time.Time { + return publisher.clock.Now().UTC().Truncate(time.Millisecond) +} + +func (publisher *PushPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification) +} + +func (publisher *PushPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType)) +} + +func (publisher *PushPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) { + if publisher == nil || publisher.telemetry == nil { + return + } + + publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification) +} diff --git a/notification/internal/worker/push_publisher_test.go b/notification/internal/worker/push_publisher_test.go new file mode 100644 index 0000000..b0db75b --- /dev/null +++ b/notification/internal/worker/push_publisher_test.go @@ -0,0 +1,318 @@ +package worker + +import ( + "context" + "io" + "log/slog" + "sync" + "testing" + "time" + + redisstate "galaxy/notification/internal/adapters/redisstate" + "galaxy/notification/internal/api/intentstream" + "galaxy/notification/internal/service/acceptintent" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) { + t.Parallel() + + fixture := newPushPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now))) + + running := runPushPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished + }, time.Second, 10*time.Millisecond) + + emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status) + + messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, "user-1", messages[0].Values["user_id"]) + require.Equal(t, "game.turn.ready", messages[0].Values["event_type"]) + require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"]) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", "")) +} + +func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) { + t.Parallel() + + fixture := newPushPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now))) + require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err()) + + running := runPushPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1 + }, time.Second, 10*time.Millisecond) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite)) + require.True(t, fixture.telemetry.hasRouteRetry("push")) + + require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err()) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2 + }, 2*time.Second, 10*time.Millisecond) + + messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", "")) +} + +func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) { + t.Parallel() + + fixture := newPushPublisherFixture(t) + input := validPushAcceptanceInput(fixture.now) + for index := range input.Routes { + if input.Routes[index].RouteID == "push:user:user-1" { + input.Routes[index].AttemptCount = 2 + input.Routes[index].MaxAttempts = 3 + } + } + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input)) + require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err()) + + running := runPushPublisher(t, fixture.publisher) + defer running.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3 + }, time.Second, 10*time.Millisecond) + + deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes() + require.NoError(t, err) + deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload) + require.NoError(t, err) + require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification) + require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite)) + require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite)) +} + +func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) { + t.Parallel() + + fixture := newPushPublisherFixture(t) + require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now))) + + otherPublisher, err := NewPushPublisher(PushPublisherConfig{ + Store: fixture.store, + GatewayStream: fixture.gatewayStream, + GatewayStreamMaxLen: 1024, + RouteLeaseTTL: 200 * time.Millisecond, + RouteBackoffMin: 20 * time.Millisecond, + RouteBackoffMax: 20 * time.Millisecond, + PollInterval: 10 * time.Millisecond, + BatchSize: 16, + Clock: newSteppingClock(fixture.now, time.Millisecond), + }, testWorkerLogger()) + require.NoError(t, err) + + first := runPushPublisher(t, fixture.publisher) + defer first.stop(t) + second := runPushPublisher(t, otherPublisher) + defer second.stop(t) + + require.Eventually(t, func() bool { + route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1") + return err == nil && found && route.Status == acceptintent.RouteStatusPublished + }, time.Second, 10*time.Millisecond) + + messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) +} + +type pushPublisherFixture struct { + client *redis.Client + store *redisstate.AcceptanceStore + publisher *PushPublisher + gatewayStream string + now time.Time + clock *steppingClock + telemetry *recordingWorkerTelemetry +} + +func newPushPublisherFixture(t *testing.T) pushPublisherFixture { + t.Helper() + + server := miniredis.RunT(t) + client := redis.NewClient(&redis.Options{ + Addr: server.Addr(), + Protocol: 2, + DisableIdentity: true, + }) + t.Cleanup(func() { + assert.NoError(t, client.Close()) + }) + + store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{ + RecordTTL: 24 * time.Hour, + DeadLetterTTL: 72 * time.Hour, + IdempotencyTTL: 7 * 24 * time.Hour, + }) + require.NoError(t, err) + + now := time.UnixMilli(1775121700000).UTC() + clock := newSteppingClock(now, time.Millisecond) + telemetry := &recordingWorkerTelemetry{} + publisher, err := NewPushPublisher(PushPublisherConfig{ + Store: store, + GatewayStream: "gateway:client-events", + GatewayStreamMaxLen: 1024, + RouteLeaseTTL: 200 * time.Millisecond, + RouteBackoffMin: 20 * time.Millisecond, + RouteBackoffMax: 20 * time.Millisecond, + PollInterval: 10 * time.Millisecond, + BatchSize: 16, + Telemetry: telemetry, + Clock: clock, + }, testWorkerLogger()) + require.NoError(t, err) + + return pushPublisherFixture{ + client: client, + store: store, + publisher: publisher, + gatewayStream: "gateway:client-events", + now: now, + clock: clock, + telemetry: telemetry, + } +} + +func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput { + return acceptintent.CreateAcceptanceInput{ + Notification: acceptintent.NotificationRecord{ + NotificationID: "1775121700000-0", + NotificationType: intentstream.NotificationTypeGameTurnReady, + Producer: intentstream.ProducerGameMaster, + AudienceKind: intentstream.AudienceKindUser, + RecipientUserIDs: []string{"user-1"}, + PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, + IdempotencyKey: "game-123:turn-54", + RequestFingerprint: "sha256:deadbeef", + RequestID: "request-1", + TraceID: "trace-1", + OccurredAt: now, + AcceptedAt: now, + UpdatedAt: now, + }, + Routes: []acceptintent.NotificationRoute{ + { + NotificationID: "1775121700000-0", + RouteID: "push:user:user-1", + Channel: intentstream.ChannelPush, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 3, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + { + NotificationID: "1775121700000-0", + RouteID: "email:user:user-1", + Channel: intentstream.ChannelEmail, + RecipientRef: "user:user-1", + Status: acceptintent.RouteStatusPending, + AttemptCount: 0, + MaxAttempts: 7, + NextAttemptAt: now, + ResolvedEmail: "pilot@example.com", + ResolvedLocale: "en", + CreatedAt: now, + UpdatedAt: now, + }, + }, + Idempotency: acceptintent.IdempotencyRecord{ + Producer: intentstream.ProducerGameMaster, + IdempotencyKey: "game-123:turn-54", + NotificationID: "1775121700000-0", + RequestFingerprint: "sha256:deadbeef", + CreatedAt: now, + ExpiresAt: now.Add(7 * 24 * time.Hour), + }, + } +} + +type runningPushPublisher struct { + cancel context.CancelFunc + resultCh chan error +} + +func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher { + t.Helper() + + ctx, cancel := context.WithCancel(context.Background()) + resultCh := make(chan error, 1) + go func() { + resultCh <- publisher.Run(ctx) + }() + + return runningPushPublisher{ + cancel: cancel, + resultCh: resultCh, + } +} + +func (r runningPushPublisher) stop(t *testing.T) { + t.Helper() + + r.cancel() + + select { + case err := <-r.resultCh: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + require.FailNow(t, "push publisher did not stop") + } +} + +type steppingClock struct { + mu sync.Mutex + current time.Time + step time.Duration +} + +func newSteppingClock(start time.Time, step time.Duration) *steppingClock { + return &steppingClock{ + current: start.UTC().Truncate(time.Millisecond), + step: step, + } +} + +func (clock *steppingClock) Now() time.Time { + clock.mu.Lock() + defer clock.mu.Unlock() + + now := clock.current + clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond) + + return now +} + +func testWorkerLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} diff --git a/notification/internal/worker/telemetry_test.go b/notification/internal/worker/telemetry_test.go new file mode 100644 index 0000000..a54592a --- /dev/null +++ b/notification/internal/worker/telemetry_test.go @@ -0,0 +1,184 @@ +package worker + +import ( + "context" + "sync" +) + +type recordingWorkerTelemetry struct { + mu sync.Mutex + + intentOutcomes []intentOutcomeTelemetryRecord + malformedIntents []malformedIntentTelemetryRecord + userEnrichment []userEnrichmentTelemetryRecord + routePublishAttempts []routePublishTelemetryRecord + routeRetries []routeTelemetryRecord + routeDeadLetters []routeDeadLetterTelemetryRecord +} + +func (telemetry *recordingWorkerTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeTelemetryRecord{ + notificationType: notificationType, + producer: producer, + audienceKind: audienceKind, + outcome: outcome, + }) +} + +func (telemetry *recordingWorkerTelemetry) RecordMalformedIntent(_ context.Context, failureCode string, notificationType string, producer string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.malformedIntents = append(telemetry.malformedIntents, malformedIntentTelemetryRecord{ + failureCode: failureCode, + notificationType: notificationType, + producer: producer, + }) +} + +func (telemetry *recordingWorkerTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentTelemetryRecord{ + notificationType: notificationType, + result: result, + }) +} + +func (telemetry *recordingWorkerTelemetry) RecordRoutePublishAttempt(_ context.Context, channel string, notificationType string, result string, failureClassification string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.routePublishAttempts = append(telemetry.routePublishAttempts, routePublishTelemetryRecord{ + channel: channel, + notificationType: notificationType, + result: result, + failureClassification: failureClassification, + }) +} + +func (telemetry *recordingWorkerTelemetry) RecordRouteRetry(_ context.Context, channel string, notificationType string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.routeRetries = append(telemetry.routeRetries, routeTelemetryRecord{ + channel: channel, + notificationType: notificationType, + }) +} + +func (telemetry *recordingWorkerTelemetry) RecordRouteDeadLetter(_ context.Context, channel string, notificationType string, failureClassification string) { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + telemetry.routeDeadLetters = append(telemetry.routeDeadLetters, routeDeadLetterTelemetryRecord{ + channel: channel, + notificationType: notificationType, + failureClassification: failureClassification, + }) +} + +func (telemetry *recordingWorkerTelemetry) hasIntentOutcome(outcome string) bool { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + for _, record := range telemetry.intentOutcomes { + if record.outcome == outcome { + return true + } + } + + return false +} + +func (telemetry *recordingWorkerTelemetry) hasMalformedIntent(failureCode string) bool { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + for _, record := range telemetry.malformedIntents { + if record.failureCode == failureCode { + return true + } + } + + return false +} + +func (telemetry *recordingWorkerTelemetry) hasRoutePublishAttempt(channel string, result string, failureClassification string) bool { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + for _, record := range telemetry.routePublishAttempts { + if record.channel == channel && record.result == result && record.failureClassification == failureClassification { + return true + } + } + + return false +} + +func (telemetry *recordingWorkerTelemetry) hasRouteRetry(channel string) bool { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + for _, record := range telemetry.routeRetries { + if record.channel == channel { + return true + } + } + + return false +} + +func (telemetry *recordingWorkerTelemetry) hasRouteDeadLetter(channel string, failureClassification string) bool { + telemetry.mu.Lock() + defer telemetry.mu.Unlock() + + for _, record := range telemetry.routeDeadLetters { + if record.channel == channel && record.failureClassification == failureClassification { + return true + } + } + + return false +} + +type intentOutcomeTelemetryRecord struct { + notificationType string + producer string + audienceKind string + outcome string +} + +type malformedIntentTelemetryRecord struct { + failureCode string + notificationType string + producer string +} + +type userEnrichmentTelemetryRecord struct { + notificationType string + result string +} + +type routePublishTelemetryRecord struct { + channel string + notificationType string + result string + failureClassification string +} + +type routeTelemetryRecord struct { + channel string + notificationType string +} + +type routeDeadLetterTelemetryRecord struct { + channel string + notificationType string + failureClassification string +} diff --git a/notification/mail_template_contract_test.go b/notification/mail_template_contract_test.go new file mode 100644 index 0000000..51ce57b --- /dev/null +++ b/notification/mail_template_contract_test.go @@ -0,0 +1,185 @@ +package notification + +import ( + "path/filepath" + "strings" + "testing" + texttemplate "text/template" + "text/template/parse" + + "github.com/stretchr/testify/require" +) + +const expectedNotificationMailTemplateTable = `| ` + "`notification_type`" + ` | ` + "`template_id`" + ` | Required assets | +| --- | --- | --- | +| ` + "`geo.review_recommended`" + ` | ` + "`geo.review_recommended`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`game.turn.ready`" + ` | ` + "`game.turn.ready`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`game.finished`" + ` | ` + "`game.finished`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`game.generation_failed`" + ` | ` + "`game.generation_failed`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.runtime_paused_after_start`" + ` | ` + "`lobby.runtime_paused_after_start`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.application.submitted`" + ` | ` + "`lobby.application.submitted`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.membership.approved`" + ` | ` + "`lobby.membership.approved`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.membership.rejected`" + ` | ` + "`lobby.membership.rejected`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.invite.created`" + ` | ` + "`lobby.invite.created`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.invite.redeemed`" + ` | ` + "`lobby.invite.redeemed`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` | +| ` + "`lobby.invite.expired`" + ` | ` + "`lobby.invite.expired`" + ` | ` + "`en/subject.tmpl`" + `, ` + "`en/text.tmpl`" + ` |` + +var expectedNotificationMailReadmeSnippets = []string{ + "`payload_mode` is always `template`", + "`template_id` equals `notification_type`", + "Auth-code email remains a direct `Auth / Session Service -> Mail Service` flow and does not pass through `Notification Service`.", +} + +var expectedMailServiceReadmeSnippets = []string{ + "`Notification Service` uses only `payload_mode=template` for notification-generated mail", + "notification-owned `template_id` values are identical to the `notification_type` vocabulary", + "`auth.login_code` remains the required auth template family for the direct `Auth / Session Service -> Mail Service` flow and is not part of the notification-owned template set.", +} + +func TestNotificationMailTemplateDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + examplesDoc := loadTextFile(t, filepath.Join("docs", "examples.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + mailReadme := loadTextFile(t, filepath.Join("..", "mail", "README.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedExamplesDoc := normalizeWhitespace(examplesDoc) + normalizedMailReadme := normalizeWhitespace(mailReadme) + + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + require.Contains(t, docsIndex, "- [Configuration and contract examples](examples.md)") + require.Contains(t, readme, expectedNotificationMailTemplateTable) + require.Contains(t, readme, "`auth.login_code` does not belong to the notification-owned template set.") + + require.NotContains(t, readme, "The initial required template IDs are:") + require.NotContains(t, mailReadme, "Initial non-auth notification template directories:") + + for _, snippet := range expectedNotificationMailReadmeSnippets { + require.Contains(t, normalizedReadme, normalizeWhitespace(snippet)) + } + + for _, snippet := range expectedMailServiceReadmeSnippets { + require.Contains(t, normalizedMailReadme, normalizeWhitespace(snippet)) + } + + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("Notification-generated mail always uses `source=notification`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("`payload_mode=template`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("`template_id == notification_type`")) + require.Contains(t, normalizedExamplesDoc, normalizeWhitespace("payload_mode template")) +} + +func TestNotificationMailTemplatesExistAndAreNonEmpty(t *testing.T) { + t.Parallel() + + for _, templateID := range expectedNotificationTypeCatalog { + subjectPath, textPath := notificationMailTemplatePaths(templateID) + + subject := loadTextFile(t, subjectPath) + text := loadTextFile(t, textPath) + + require.NotEmptyf(t, strings.TrimSpace(subject), "subject template %s must not be empty", subjectPath) + require.NotEmptyf(t, strings.TrimSpace(text), "text template %s must not be empty", textPath) + } +} + +func TestNotificationMailTemplateVariablesStayWithinFrozenPayloadFields(t *testing.T) { + t.Parallel() + + for _, templateID := range expectedNotificationTypeCatalog { + allowedFields := make(map[string]struct{}, len(expectedNotificationCatalog[templateID].requiredFields)) + for _, field := range expectedNotificationCatalog[templateID].requiredFields { + allowedFields[field] = struct{}{} + } + + for _, templatePath := range []string{ + filepath.Join("..", "mail", "templates", templateID, "en", "subject.tmpl"), + filepath.Join("..", "mail", "templates", templateID, "en", "text.tmpl"), + } { + for _, fieldPath := range parsedTemplateFieldPaths(t, templatePath) { + _, ok := allowedFields[fieldPath] + require.Truef( + t, + ok, + "template %s references field %q outside frozen payload contract for %s", + templatePath, + fieldPath, + templateID, + ) + } + } + } +} + +func notificationMailTemplatePaths(templateID string) (subjectPath string, textPath string) { + return filepath.Join("..", "mail", "templates", templateID, "en", "subject.tmpl"), + filepath.Join("..", "mail", "templates", templateID, "en", "text.tmpl") +} + +func parsedTemplateFieldPaths(t *testing.T, relativePath string) []string { + t.Helper() + + source := loadTextFile(t, relativePath) + tmpl, err := texttemplate.New(filepath.Base(relativePath)).Parse(source) + require.NoErrorf(t, err, "parse template %s", relativePath) + require.NotNil(t, tmpl.Tree) + require.NotNil(t, tmpl.Tree.Root) + + fields := make(map[string]struct{}) + collectTemplateFieldPaths(tmpl.Tree.Root, fields) + + result := make([]string, 0, len(fields)) + for field := range fields { + result = append(result, field) + } + + return result +} + +func collectTemplateFieldPaths(node parse.Node, fields map[string]struct{}) { + if node == nil { + return + } + + switch typed := node.(type) { + case *parse.ListNode: + for _, child := range typed.Nodes { + collectTemplateFieldPaths(child, fields) + } + case *parse.ActionNode: + collectTemplateFieldPaths(typed.Pipe, fields) + case *parse.IfNode: + collectTemplateFieldPaths(typed.Pipe, fields) + collectTemplateFieldPaths(typed.List, fields) + collectTemplateFieldPaths(typed.ElseList, fields) + case *parse.RangeNode: + collectTemplateFieldPaths(typed.Pipe, fields) + collectTemplateFieldPaths(typed.List, fields) + collectTemplateFieldPaths(typed.ElseList, fields) + case *parse.WithNode: + collectTemplateFieldPaths(typed.Pipe, fields) + collectTemplateFieldPaths(typed.List, fields) + collectTemplateFieldPaths(typed.ElseList, fields) + case *parse.TemplateNode: + collectTemplateFieldPaths(typed.Pipe, fields) + case *parse.PipeNode: + for _, child := range typed.Cmds { + collectTemplateFieldPaths(child, fields) + } + case *parse.CommandNode: + for _, child := range typed.Args { + collectTemplateFieldPaths(child, fields) + } + case *parse.FieldNode: + if fieldPath := strings.Join(typed.Ident, "."); fieldPath != "" { + fields[fieldPath] = struct{}{} + } + case *parse.ChainNode: + if fieldPath := strings.Join(typed.Field, "."); fieldPath != "" { + fields[fieldPath] = struct{}{} + } + collectTemplateFieldPaths(typed.Node, fields) + } +} diff --git a/notification/observability_recovery_contract_test.go b/notification/observability_recovery_contract_test.go new file mode 100644 index 0000000..3c94a60 --- /dev/null +++ b/notification/observability_recovery_contract_test.go @@ -0,0 +1,34 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNotificationObservabilityAndRecoveryDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + runbookDoc := loadTextFile(t, filepath.Join("docs", "runbook.md")) + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + + require.Contains(t, docsIndex, "- [Operator runbook](runbook.md)") + + normalizedReadme := normalizeWhitespace(readme) + normalizedRunbookDoc := normalizeWhitespace(runbookDoc) + require.Contains(t, normalizedReadme, normalizeWhitespace("notification.intent.outcomes")) + require.Contains(t, normalizedReadme, normalizeWhitespace("notification.route_schedule.depth")) + require.Contains(t, normalizedReadme, normalizeWhitespace("notification.intent_stream.oldest_unprocessed_age_ms")) + require.Contains(t, normalizedRunbookDoc, normalizeWhitespace("notification.route_schedule.depth")) + require.Contains(t, normalizedRunbookDoc, normalizeWhitespace("notification.intent_stream.oldest_unprocessed_age_ms")) + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("new `idempotency_key`")) + require.Contains(t, normalizeWhitespace(runbookDoc), normalizeWhitespace("new producer-owned `idempotency_key`")) + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("there is still no `/metrics` route")) + require.Contains(t, normalizeWhitespace(runbookDoc), normalizeWhitespace("there is no `/metrics` route")) + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("Metrics intentionally avoid high-cardinality attributes such as `user_id`, email address, `notification_id`, `route_id`, and `idempotency_key`")) + require.Contains(t, normalizeWhitespace(flowsDoc), normalizeWhitespace("A dead-lettered route never rolls back or invalidates a sibling route that already reached `published`")) + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("Manual Redis mutation of an existing route record or `notification:route_schedule` is not a supported replay workflow.")) +} diff --git a/notification/openapi.yaml b/notification/openapi.yaml new file mode 100644 index 0000000..1cdd97c --- /dev/null +++ b/notification/openapi.yaml @@ -0,0 +1,106 @@ +openapi: 3.1.0 +info: + title: Notification Service Probe API + version: 1.0.0 + description: | + Private process-local probe API for Notification Service. + + This contract covers only liveness and readiness checks. It does not define + an operator API and does not expose notification ingress. + + Undefined routes use the standard `404` response. Unsupported methods on + defined probe routes use the standard `405` response. +servers: + - url: http://127.0.0.1:8092 + description: Default local internal probe listener. +tags: + - name: probes + description: Private liveness and readiness probes. +paths: + /healthz: + get: + tags: + - probes + operationId: getNotificationHealth + summary: Check process liveness. + responses: + "200": + description: The process is alive. + content: + application/json: + schema: + $ref: "#/components/schemas/HealthStatus" + examples: + ok: + value: + status: ok + "405": + $ref: "#/components/responses/MethodNotAllowed" + /readyz: + get: + tags: + - probes + operationId: getNotificationReadiness + summary: Check process readiness after successful startup. + description: | + Readiness is process-local after startup and does not perform a live + Redis ping for every request. + responses: + "200": + description: The process completed startup and is ready to serve. + content: + application/json: + schema: + $ref: "#/components/schemas/ReadyStatus" + examples: + ready: + value: + status: ready + "405": + $ref: "#/components/responses/MethodNotAllowed" +components: + schemas: + HealthStatus: + type: object + additionalProperties: false + required: + - status + properties: + status: + type: string + enum: + - ok + ReadyStatus: + type: object + additionalProperties: false + required: + - status + properties: + status: + type: string + enum: + - ready + responses: + NotFound: + description: Route is not defined by the probe API. + content: + text/plain: + schema: + type: string + examples: + notFound: + value: "404 page not found\n" + MethodNotAllowed: + description: HTTP method is not allowed for the route. + headers: + Allow: + schema: + type: string + description: Methods accepted by the route. + content: + text/plain: + schema: + type: string + examples: + methodNotAllowed: + value: "Method Not Allowed\n" diff --git a/notification/producer_integration_contract_test.go b/notification/producer_integration_contract_test.go new file mode 100644 index 0000000..43f128c --- /dev/null +++ b/notification/producer_integration_contract_test.go @@ -0,0 +1,167 @@ +package notification + +import ( + "path/filepath" + "testing" + "time" + + "galaxy/notification/internal/api/intentstream" + "galaxy/notificationintent" + + "github.com/stretchr/testify/require" +) + +func TestNotificationProducerIntegrationDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + runtimeDoc := loadTextFile(t, filepath.Join("docs", "runtime.md")) + examplesDoc := loadTextFile(t, filepath.Join("docs", "examples.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + architecture := loadTextFile(t, filepath.Join("..", "ARCHITECTURE.md")) + geoProfileReadme := loadTextFile(t, filepath.Join("..", "geoprofile", "README.md")) + + require.Contains(t, docsIndex, "- [Runtime and components](runtime.md)") + require.Contains(t, docsIndex, "- [Configuration and contract examples](examples.md)") + + for _, content := range []string{readme, runtimeDoc, architecture, geoProfileReadme} { + normalizedContent := normalizeWhitespace(content) + require.Contains(t, normalizedContent, normalizeWhitespace("`galaxy/notificationintent`")) + require.Contains(t, normalizedContent, normalizeWhitespace("notification degradation")) + } + require.Contains(t, normalizeWhitespace(readme), normalizeWhitespace("producer publication uses plain `XADD` without stream trimming or hidden helper retries")) + require.Contains(t, normalizeWhitespace(examplesDoc), normalizeWhitespace("redis-cli XADD notification:intents")) +} + +func TestNotificationProducerIntentsDecodeThroughServiceContract(t *testing.T) { + t.Parallel() + + for _, original := range compatibleProducerIntents(t) { + original := original + t.Run(original.NotificationType.String()+"/"+original.AudienceKind.String(), func(t *testing.T) { + t.Parallel() + + values, err := original.Values() + require.NoError(t, err) + + decoded, err := intentstream.DecodeIntent(values) + require.NoError(t, err) + require.Equal(t, original.NotificationType, decoded.NotificationType) + require.Equal(t, original.Producer, decoded.Producer) + require.Equal(t, original.AudienceKind, decoded.AudienceKind) + require.Equal(t, original.RecipientUserIDs, decoded.RecipientUserIDs) + require.Equal(t, original.IdempotencyKey, decoded.IdempotencyKey) + require.Equal(t, original.OccurredAt, decoded.OccurredAt) + require.JSONEq(t, original.PayloadJSON, decoded.PayloadJSON) + }) + } +} + +func compatibleProducerIntents(t *testing.T) []notificationintent.Intent { + t.Helper() + + metadata := notificationintent.Metadata{ + IdempotencyKey: "idempotency-1", + OccurredAt: time.UnixMilli(1775121700000), + } + + builders := []func() (notificationintent.Intent, error){ + func() (notificationintent.Intent, error) { + return notificationintent.NewGeoReviewRecommendedIntent(metadata, notificationintent.GeoReviewRecommendedPayload{ + UserID: "user-1", + UserEmail: "pilot@example.com", + ObservedCountry: "DE", + UsualConnectionCountry: "PL", + ReviewReason: "country_mismatch", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewGameTurnReadyIntent(metadata, []string{"user-1", "user-2"}, notificationintent.GameTurnReadyPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + TurnNumber: 54, + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewGameFinishedIntent(metadata, []string{"user-1", "user-2"}, notificationintent.GameFinishedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + FinalTurnNumber: 55, + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewGameGenerationFailedIntent(metadata, notificationintent.GameGenerationFailedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + FailureReason: "engine_timeout", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyRuntimePausedAfterStartIntent(metadata, notificationintent.LobbyRuntimePausedAfterStartPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewPrivateLobbyApplicationSubmittedIntent(metadata, "owner-1", notificationintent.LobbyApplicationSubmittedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + ApplicantUserID: "user-2", + ApplicantName: "Nova Pilot", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewPublicLobbyApplicationSubmittedIntent(metadata, notificationintent.LobbyApplicationSubmittedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + ApplicantUserID: "user-2", + ApplicantName: "Nova Pilot", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyMembershipApprovedIntent(metadata, "applicant-1", notificationintent.LobbyMembershipApprovedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyMembershipRejectedIntent(metadata, "applicant-1", notificationintent.LobbyMembershipRejectedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyInviteCreatedIntent(metadata, "invited-1", notificationintent.LobbyInviteCreatedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviterUserID: "owner-1", + InviterName: "Owner Pilot", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyInviteRedeemedIntent(metadata, "owner-1", notificationintent.LobbyInviteRedeemedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviteeUserID: "invitee-1", + InviteeName: "Nova Pilot", + }) + }, + func() (notificationintent.Intent, error) { + return notificationintent.NewLobbyInviteExpiredIntent(metadata, "owner-1", notificationintent.LobbyInviteExpiredPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviteeUserID: "invitee-1", + InviteeName: "Nova Pilot", + }) + }, + } + + intents := make([]notificationintent.Intent, 0, len(builders)) + for _, build := range builders { + intent, err := build() + require.NoError(t, err) + intents = append(intents, intent) + } + + return intents +} diff --git a/notification/push_payload_contract_test.go b/notification/push_payload_contract_test.go new file mode 100644 index 0000000..8dce98f --- /dev/null +++ b/notification/push_payload_contract_test.go @@ -0,0 +1,161 @@ +package notification + +import ( + "os" + "path/filepath" + "regexp" + "sort" + "testing" + + "github.com/stretchr/testify/require" +) + +const expectedPushPayloadMappingTable = `| ` + "`notification_type`" + ` | FlatBuffers table | Payload fields | +| --- | --- | --- | +| ` + "`game.turn.ready`" + ` | ` + "`notification.GameTurnReadyEvent`" + ` | ` + "`game_id`" + `, ` + "`turn_number`" + ` | +| ` + "`game.finished`" + ` | ` + "`notification.GameFinishedEvent`" + ` | ` + "`game_id`" + `, ` + "`final_turn_number`" + ` | +| ` + "`lobby.application.submitted`" + ` | ` + "`notification.LobbyApplicationSubmittedEvent`" + ` | ` + "`game_id`" + `, ` + "`applicant_user_id`" + ` | +| ` + "`lobby.membership.approved`" + ` | ` + "`notification.LobbyMembershipApprovedEvent`" + ` | ` + "`game_id`" + ` | +| ` + "`lobby.membership.rejected`" + ` | ` + "`notification.LobbyMembershipRejectedEvent`" + ` | ` + "`game_id`" + ` | +| ` + "`lobby.invite.created`" + ` | ` + "`notification.LobbyInviteCreatedEvent`" + ` | ` + "`game_id`" + `, ` + "`inviter_user_id`" + ` | +| ` + "`lobby.invite.redeemed`" + ` | ` + "`notification.LobbyInviteRedeemedEvent`" + ` | ` + "`game_id`" + `, ` + "`invitee_user_id`" + ` |` + +var expectedPushPayloadSchemaTableNames = []string{ + "GameTurnReadyEvent", + "GameFinishedEvent", + "LobbyApplicationSubmittedEvent", + "LobbyMembershipApprovedEvent", + "LobbyMembershipRejectedEvent", + "LobbyInviteCreatedEvent", + "LobbyInviteRedeemedEvent", +} + +var expectedPushPayloadSchemaFields = map[string][]string{ + "GameTurnReadyEvent": { + "game_id:string;", + "turn_number:int64;", + }, + "GameFinishedEvent": { + "game_id:string;", + "final_turn_number:int64;", + }, + "LobbyApplicationSubmittedEvent": { + "game_id:string;", + "applicant_user_id:string;", + }, + "LobbyMembershipApprovedEvent": { + "game_id:string;", + }, + "LobbyMembershipRejectedEvent": { + "game_id:string;", + }, + "LobbyInviteCreatedEvent": { + "game_id:string;", + "inviter_user_id:string;", + }, + "LobbyInviteRedeemedEvent": { + "game_id:string;", + "invitee_user_id:string;", + }, +} + +var expectedPushPayloadGeneratedFiles = []string{ + "GameFinishedEvent.go", + "GameTurnReadyEvent.go", + "LobbyApplicationSubmittedEvent.go", + "LobbyInviteCreatedEvent.go", + "LobbyInviteRedeemedEvent.go", + "LobbyMembershipApprovedEvent.go", + "LobbyMembershipRejectedEvent.go", +} + +var expectedPushPayloadDocumentationSnippets = []string{ + "Only the seven user-facing push notification types above are represented in `notification.fbs`.", + "`geo.review_recommended`, `game.generation_failed`, `lobby.runtime_paused_after_start`, and `lobby.invite.expired` remain outside this schema because they are email-only in v1.", + "`notification_type` alone determines the concrete FlatBuffers table.", + "No extra envelope or FlatBuffers `union` is added in v1.", + "The push payload must stay lightweight and must not attempt to mirror full game, lobby, or profile state.", + "`game_name`, human-readable user names, and other full business-state fields stay out of the push schema.", +} + +func TestNotificationPushPayloadSchemaFreezesTablesAndFields(t *testing.T) { + t.Parallel() + + schema := loadTextFile(t, filepath.Join("..", "pkg", "schema", "fbs", "notification.fbs")) + require.Contains(t, schema, "namespace notification;") + require.Contains(t, schema, "root_type GameTurnReadyEvent;") + require.NotContains(t, schema, "union ") + + tablePattern := regexp.MustCompile(`(?m)^table ([A-Za-z0-9_]+) \{$`) + matches := tablePattern.FindAllStringSubmatch(schema, -1) + actualTableNames := make([]string, 0, len(matches)) + for _, match := range matches { + actualTableNames = append(actualTableNames, match[1]) + } + + require.Equal(t, expectedPushPayloadSchemaTableNames, actualTableNames) + + for _, tableName := range expectedPushPayloadSchemaTableNames { + tableBody := extractFlatBuffersTableBody(t, schema, tableName) + for _, field := range expectedPushPayloadSchemaFields[tableName] { + require.Contains(t, tableBody, field) + } + } +} + +func TestNotificationPushPayloadGeneratedBindingsStayInSync(t *testing.T) { + t.Parallel() + + dirPath := filepath.Join(moduleRoot(t), "..", "pkg", "schema", "fbs", "notification") + entries, err := os.ReadDir(dirPath) + require.NoError(t, err) + + actualFiles := make([]string, 0, len(entries)) + for _, entry := range entries { + require.Falsef(t, entry.IsDir(), "unexpected directory in generated bindings: %s", entry.Name()) + actualFiles = append(actualFiles, entry.Name()) + + fileContents := loadTextFile(t, filepath.Join("..", "pkg", "schema", "fbs", "notification", entry.Name())) + require.Contains(t, fileContents, "// Code generated by the FlatBuffers compiler. DO NOT EDIT.") + require.Contains(t, fileContents, "package notification") + } + + sort.Strings(actualFiles) + require.Equal(t, expectedPushPayloadGeneratedFiles, actualFiles) +} + +func TestNotificationPushPayloadDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + examplesDoc := loadTextFile(t, filepath.Join("docs", "examples.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedExamplesDoc := normalizeWhitespace(examplesDoc) + + require.Contains(t, readme, expectedPushPayloadMappingTable) + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + require.Contains(t, docsIndex, "- [Configuration and contract examples](examples.md)") + + for _, snippet := range expectedPushPayloadDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("encode FlatBuffers notification payload")) + require.Contains(t, normalizedExamplesDoc, normalizeWhitespace("payload_bytes ''")) +} + +func extractFlatBuffersTableBody(t *testing.T, schema, tableName string) string { + t.Helper() + + pattern := regexp.MustCompile(`(?s)table ` + regexp.QuoteMeta(tableName) + ` \{(.*?)\}`) + match := pattern.FindStringSubmatch(schema) + if match == nil { + require.FailNowf(t, "test failed", "table %s not found in schema", tableName) + } + + return match[1] +} diff --git a/notification/redis_state_contract_test.go b/notification/redis_state_contract_test.go new file mode 100644 index 0000000..6b8948f --- /dev/null +++ b/notification/redis_state_contract_test.go @@ -0,0 +1,87 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +const expectedNotificationRedisKeyTable = `| Logical artifact | Redis key | +| --- | --- | +| ` + "`notification_record`" + ` | ` + "`notification:records:`" + ` | +| ` + "`notification_route`" + ` | ` + "`notification:routes::`" + ` | +| temporary route lease | ` + "`notification:route_leases::`" + ` | +| ` + "`notification_idempotency_record`" + ` | ` + "`notification:idempotency::`" + ` | +| ` + "`notification_dead_letter_entry`" + ` | ` + "`notification:dead_letters::`" + ` | +| malformed intent record | ` + "`notification:malformed_intents:`" + ` | +| stream offset record | ` + "`notification:stream_offsets:`" + ` | +| ingress stream | ` + "`notification:intents`" + ` | +| route schedule sorted set | ` + "`notification:route_schedule`" + ` |` + +const expectedNotificationRedisRecordFieldsTable = `| Record | Frozen fields | +| --- | --- | +| ` + "`notification_record`" + ` | ` + "`notification_id`" + `, ` + "`notification_type`" + `, ` + "`producer`" + `, ` + "`audience_kind`" + `, normalized ` + "`recipient_user_ids`" + `, normalized ` + "`payload_json`" + `, ` + "`idempotency_key`" + `, ` + "`request_fingerprint`" + `, optional ` + "`request_id`" + `, optional ` + "`trace_id`" + `, ` + "`occurred_at_ms`" + `, ` + "`accepted_at_ms`" + `, ` + "`updated_at_ms`" + ` | +| ` + "`notification_route`" + ` | ` + "`notification_id`" + `, ` + "`route_id`" + `, ` + "`channel`" + `, ` + "`recipient_ref`" + `, ` + "`status`" + `, ` + "`attempt_count`" + `, ` + "`max_attempts`" + `, ` + "`next_attempt_at_ms`" + `, optional ` + "`resolved_email`" + `, optional ` + "`resolved_locale`" + `, optional ` + "`last_error_classification`" + `, optional ` + "`last_error_message`" + `, optional ` + "`last_error_at_ms`" + `, ` + "`created_at_ms`" + `, ` + "`updated_at_ms`" + `, optional ` + "`published_at_ms`" + `, optional ` + "`dead_lettered_at_ms`" + `, optional ` + "`skipped_at_ms`" + ` | +| ` + "`notification_idempotency_record`" + ` | ` + "`producer`" + `, ` + "`idempotency_key`" + `, ` + "`notification_id`" + `, ` + "`request_fingerprint`" + `, ` + "`created_at_ms`" + `, ` + "`expires_at_ms`" + ` | +| ` + "`notification_dead_letter_entry`" + ` | ` + "`notification_id`" + `, ` + "`route_id`" + `, ` + "`channel`" + `, ` + "`recipient_ref`" + `, ` + "`final_attempt_count`" + `, ` + "`max_attempts`" + `, ` + "`failure_classification`" + `, ` + "`failure_message`" + `, ` + "`created_at_ms`" + `, optional ` + "`recovery_hint`" + ` | +| malformed intent record | ` + "`stream_entry_id`" + `, optional ` + "`notification_type`" + `, optional ` + "`producer`" + `, optional ` + "`idempotency_key`" + `, ` + "`failure_code`" + `, ` + "`failure_message`" + `, ` + "`raw_fields_json`" + `, ` + "`recorded_at_ms`" + ` | +| stream offset record | ` + "`stream`" + `, ` + "`last_processed_entry_id`" + `, ` + "`updated_at_ms`" + ` |` + +var expectedNotificationRedisDocumentationSnippets = []string{ + "Each route represents exactly one `(channel, recipient_ref)` pair.", + "every derived `recipient_ref` receives one `push` route slot and one `email` route slot, except that an empty administrator email list materializes one synthetic `config:` recipient slot with only a skipped `email` route", + "a route slot whose channel is outside the notification type channel matrix is materialized as `skipped`", + "`recipient_ref` is `user:` for user-targeted routes", + "`recipient_ref` is `email:` for configured administrator email routes", + "synthetic recipient slot `config:` with one skipped `email` route so the configuration gap remains durable and operator-visible", + "`route_id` is mandatory and equals `:`", + "durable records are stored as strict JSON blobs", + "timestamps are stored in Unix milliseconds", + "dynamic Redis key segments are base64url-encoded", + "`notification:route_schedule` is one shared sorted set for both `push` and `email`", + "`notification_record.payload_json` stores the canonical normalized JSON string used for idempotency fingerprinting", + "temporary route lease keys store one opaque worker token and use `NOTIFICATION_ROUTE_LEASE_TTL`; they are service-local coordination state rather than durable records", + "score = `next_attempt_at_ms` and member = full Redis route key with encoded dynamic segments", + "`status=pending` and `next_attempt_at_ms = accepted_at_ms`", + "`failed` routes remain scheduled for retry", + "`published`, `dead_letter`, and `skipped` are absent from the schedule", + "only the current lease holder may finalize one due publication attempt", + "after failed attempt `N`, the next delay is `clamp(NOTIFICATION_ROUTE_BACKOFF_MIN * 2^(N-1), NOTIFICATION_ROUTE_BACKOFF_MIN, NOTIFICATION_ROUTE_BACKOFF_MAX)`", + "no jitter is added to the retry delay", + "creates `notification_dead_letter_entry`, and is removed from `notification:route_schedule`", + "`notification_record` and `notification_route` use `NOTIFICATION_RECORD_TTL`", + "`notification_idempotency_record` uses `NOTIFICATION_IDEMPOTENCY_TTL`", + "`notification_dead_letter_entry` and malformed intent records use `NOTIFICATION_DEAD_LETTER_TTL`", + "stream offset records do not use TTL", +} + +func TestNotificationRedisDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + runtimeDoc := loadTextFile(t, filepath.Join("docs", "runtime.md")) + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + runbookDoc := loadTextFile(t, filepath.Join("docs", "runbook.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedRuntimeDoc := normalizeWhitespace(runtimeDoc) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedRunbookDoc := normalizeWhitespace(runbookDoc) + + require.Contains(t, docsIndex, "- [Runtime and components](runtime.md)") + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + require.Contains(t, docsIndex, "- [Operator runbook](runbook.md)") + + require.Contains(t, readme, expectedNotificationRedisKeyTable) + require.Contains(t, readme, expectedNotificationRedisRecordFieldsTable) + + for _, snippet := range expectedNotificationRedisDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedRuntimeDoc, normalizeWhitespace("Redis client with startup connectivity check")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("Retry and Dead Letter")) + require.Contains(t, normalizedRunbookDoc, normalizeWhitespace("Route Schedule Backlog Grows")) +} diff --git a/notification/route_publication_contract_test.go b/notification/route_publication_contract_test.go new file mode 100644 index 0000000..332bfc5 --- /dev/null +++ b/notification/route_publication_contract_test.go @@ -0,0 +1,71 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +var expectedNotificationPushPublicationDocumentationSnippets = []string{ + "one long-lived `push` route publisher", + "the `push` publisher claims only routes whose `route_id` starts with `push:`", + "`Gateway` publication uses `XADD MAXLEN ~` with `NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN`", + "`event_id` equals `/`", + "`device_session_id` is intentionally omitted so `Gateway` fans the event out to every active stream of that user", +} + +var expectedNotificationMailPublicationDocumentationSnippets = []string{ + "one long-lived `email` route publisher", + "template-mode `email` publication toward `Mail Service`", + "`Mail Service` publication uses plain `XADD` with no stream trimming", + "`delivery_id` equals `/`", + "`idempotency_key` equals `notification:/`", + "`requested_at_ms` equals `accepted_at_ms`", + "`payload_json.to` contains exactly one resolved recipient email", + "`payload_json.cc`, `payload_json.bcc`, `payload_json.reply_to`, and `payload_json.attachments` are empty arrays in v1", + "`email` publication failures are classified minimally as `payload_encoding_failed` and `mail_stream_publish_failed`", +} + +func TestNotificationRoutePublicationDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + runtimeDoc := loadTextFile(t, filepath.Join("docs", "runtime.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + mailReadme := loadTextFile(t, filepath.Join("..", "mail", "README.md")) + mailAsyncAPI := loadTextFile(t, filepath.Join("..", "mail", "api", "delivery-commands-asyncapi.yaml")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedRuntimeDoc := normalizeWhitespace(runtimeDoc) + normalizedMailReadme := normalizeWhitespace(mailReadme) + + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + require.Contains(t, docsIndex, "- [Runtime and components](runtime.md)") + + for _, snippet := range expectedNotificationPushPublicationDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + for _, snippet := range expectedNotificationMailPublicationDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedRuntimeDoc, normalizeWhitespace("processes only scheduled route IDs beginning with `push:`")) + require.Contains(t, normalizedRuntimeDoc, normalizeWhitespace("processes only scheduled route IDs beginning with `email:`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("XADD MAXLEN ~ gateway client-event stream")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("XADD mail:delivery_commands")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("`payload_mode=template`")) + require.Contains(t, normalizedReadme, normalizeWhitespace("`notification:route_leases::`")) + require.Contains(t, normalizedReadme, normalizeWhitespace("`payload_encoding_failed`")) + require.Contains(t, normalizedReadme, normalizeWhitespace("`gateway_stream_publish_failed`")) + require.Contains(t, normalizedReadme, normalizeWhitespace("`mail_stream_publish_failed`")) + + require.Contains(t, normalizedMailReadme, normalizeWhitespace("- `requested_at_ms`")) + require.Contains(t, normalizedMailReadme, normalizeWhitespace("`requested_at_ms` stores the publisher-side original request timestamp")) + require.Contains(t, mailAsyncAPI, "requested_at_ms:") + require.Contains(t, mailAsyncAPI, "payload_mode:") +} diff --git a/notification/runtime_contract_test.go b/notification/runtime_contract_test.go new file mode 100644 index 0000000..41b9a72 --- /dev/null +++ b/notification/runtime_contract_test.go @@ -0,0 +1,81 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" +) + +var expectedNotificationRuntimeDocumentationSnippets = []string{ + "`GET /healthz` returns `{\"status\":\"ok\"}`", + "`GET /readyz` returns `{\"status\":\"ready\"}`", + "`readyz` is process-local after successful startup and does not perform a live Redis ping per request", + "there is no `/metrics` route", + "`NOTIFICATION_INTERNAL_HTTP_ADDR` with default `:8092`", + "`NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT` with default `2s`", + "`NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT` with default `10s`", + "`NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT` with default `1m`", + "`NOTIFICATION_USER_SERVICE_TIMEOUT` with default `1s`", +} + +func TestNotificationRuntimeDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + runtimeDoc := loadTextFile(t, filepath.Join("docs", "runtime.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + architecture := loadTextFile(t, filepath.Join("..", "ARCHITECTURE.md")) + testingDoc := loadTextFile(t, filepath.Join("..", "TESTING.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedRuntimeDoc := normalizeWhitespace(runtimeDoc) + + require.Contains(t, docsIndex, "- [Runtime and components](runtime.md)") + require.Contains(t, architecture, "private probe HTTP listener with") + require.Contains(t, testingDoc, "* Runtime-skeleton tests:") + require.Contains(t, testingDoc, "* `GET /healthz`") + require.Contains(t, testingDoc, "* `GET /readyz`") + require.Contains(t, testingDoc, "* no `/metrics`") + require.Contains(t, runtimeDoc, "Redis client with startup connectivity check") + require.Contains(t, runtimeDoc, "OpenTelemetry traces and metrics exporters") + + for _, snippet := range expectedNotificationRuntimeDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + require.Contains(t, normalizedRuntimeDoc, normalizedSnippet) + } +} + +func TestNotificationProbeOpenAPIContractDocumentsImplementedSurface(t *testing.T) { + t.Parallel() + + specText := loadTextFile(t, "openapi.yaml") + + var spec map[string]any + err := yaml.Unmarshal([]byte(specText), &spec) + require.NoError(t, err) + + require.Equal(t, "3.1.0", getStringValue(t, spec, "openapi")) + require.Equal(t, "Notification Service Probe API", getStringValue(t, getMapValue(t, spec, "info"), "title")) + + paths := getMapValue(t, spec, "paths") + require.ElementsMatch(t, []string{"/healthz", "/readyz"}, mapKeys(paths)) + require.NotContains(t, paths, "/metrics") + + healthz := getMapValue(t, paths, "/healthz") + readyz := getMapValue(t, paths, "/readyz") + for _, path := range []map[string]any{healthz, readyz} { + require.Contains(t, path, "get") + require.NotContains(t, path, "post") + + responses := getMapValue(t, getMapValue(t, path, "get"), "responses") + require.Contains(t, responses, "200") + require.Equal(t, "#/components/responses/MethodNotAllowed", getStringValue(t, getMapValue(t, responses, "405"), "$ref")) + } + + components := getMapValue(t, spec, "components") + responses := getMapValue(t, components, "responses") + require.Contains(t, responses, "NotFound") + require.Contains(t, responses, "MethodNotAllowed") +} diff --git a/notification/user_enrichment_contract_test.go b/notification/user_enrichment_contract_test.go new file mode 100644 index 0000000..ef31545 --- /dev/null +++ b/notification/user_enrichment_contract_test.go @@ -0,0 +1,43 @@ +package notification + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +var expectedNotificationUserEnrichmentDocumentationSnippets = []string{ + "one trusted `User Service` HTTP enrichment client", + "user-targeted route enrichment during intent acceptance before durable write", + "`404 subject_not_found` from `User Service` is recorded under malformed-intent storage with `failure_code=recipient_not_found`", + "temporary `User Service` lookup failures stop the consumer before stream-offset advance", + "current implemented support is exactly one locale: `en`", + "no intermediate locale reduction is used in v1", +} + +func TestNotificationUserEnrichmentDocsStayInSync(t *testing.T) { + t.Parallel() + + readme := loadTextFile(t, "README.md") + flowsDoc := loadTextFile(t, filepath.Join("docs", "flows.md")) + docsIndex := loadTextFile(t, filepath.Join("docs", "README.md")) + architecture := loadTextFile(t, filepath.Join("..", "ARCHITECTURE.md")) + normalizedReadme := normalizeWhitespace(readme) + normalizedFlowsDoc := normalizeWhitespace(flowsDoc) + normalizedArchitecture := normalizeWhitespace(architecture) + + require.Contains(t, docsIndex, "- [Main flows](flows.md)") + require.Contains(t, normalizedArchitecture, normalizeWhitespace("Acceptance of a user-targeted notification intent is complete only after every")) + require.Contains(t, normalizedArchitecture, normalizeWhitespace("unresolved user ids are treated as producer input defects")) + + for _, snippet := range expectedNotificationUserEnrichmentDocumentationSnippets { + normalizedSnippet := normalizeWhitespace(snippet) + require.Contains(t, normalizedReadme, normalizedSnippet) + } + + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("User-targeted routes are enriched before durable route write")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("supported resolved locale is exactly `en`")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("record malformed intent recipient_not_found")) + require.Contains(t, normalizedFlowsDoc, normalizeWhitespace("stop before stream-offset advance")) +} diff --git a/pkg/notificationintent/go.mod b/pkg/notificationintent/go.mod new file mode 100644 index 0000000..1661aae --- /dev/null +++ b/pkg/notificationintent/go.mod @@ -0,0 +1,24 @@ +module galaxy/notificationintent + +go 1.26.1 + +require ( + github.com/alicebob/miniredis/v2 v2.37.0 + github.com/redis/go-redis/v9 v9.18.0 + github.com/stretchr/testify v1.11.1 +) + +require ( + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/rogpeppe/go-internal v1.14.1 // indirect + github.com/yuin/gopher-lua v1.1.1 // indirect + go.uber.org/atomic v1.11.0 // indirect + golang.org/x/sys v0.42.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/pkg/notificationintent/go.sum b/pkg/notificationintent/go.sum new file mode 100644 index 0000000..11f0086 --- /dev/null +++ b/pkg/notificationintent/go.sum @@ -0,0 +1,31 @@ +github.com/alicebob/miniredis/v2 v2.37.0 h1:RheObYW32G1aiJIj81XVt78ZHJpHonHLHW7OLIshq68= +github.com/alicebob/miniredis/v2 v2.37.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs= +github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= +github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/notificationintent/intent.go b/pkg/notificationintent/intent.go new file mode 100644 index 0000000..7110dd5 --- /dev/null +++ b/pkg/notificationintent/intent.go @@ -0,0 +1,872 @@ +// Package notificationintent defines the shared producer-facing contract for +// publishing normalized notification intents into Notification Service. +package notificationintent + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "sort" + "strconv" + "strings" + "time" +) + +const ( + fieldNotificationType = "notification_type" + fieldProducer = "producer" + fieldAudienceKind = "audience_kind" + fieldRecipientUserIDs = "recipient_user_ids_json" + fieldIdempotencyKey = "idempotency_key" + fieldOccurredAtMS = "occurred_at_ms" + fieldRequestID = "request_id" + fieldTraceID = "trace_id" + fieldPayloadJSON = "payload_json" + + // DefaultIntentsStream stores the frozen Redis Stream name consumed by + // Notification Service. + DefaultIntentsStream = "notification:intents" +) + +var ( + requiredFieldNames = map[string]struct{}{ + fieldNotificationType: {}, + fieldProducer: {}, + fieldAudienceKind: {}, + fieldIdempotencyKey: {}, + fieldOccurredAtMS: {}, + fieldPayloadJSON: {}, + } + optionalFieldNames = map[string]struct{}{ + fieldRecipientUserIDs: {}, + fieldRequestID: {}, + fieldTraceID: {}, + } +) + +// NotificationType identifies one supported normalized notification type. +type NotificationType string + +const ( + // NotificationTypeGeoReviewRecommended identifies the + // `geo.review_recommended` notification. + NotificationTypeGeoReviewRecommended NotificationType = "geo.review_recommended" + + // NotificationTypeGameTurnReady identifies the `game.turn.ready` + // notification. + NotificationTypeGameTurnReady NotificationType = "game.turn.ready" + + // NotificationTypeGameFinished identifies the `game.finished` + // notification. + NotificationTypeGameFinished NotificationType = "game.finished" + + // NotificationTypeGameGenerationFailed identifies the + // `game.generation_failed` notification. + NotificationTypeGameGenerationFailed NotificationType = "game.generation_failed" + + // NotificationTypeLobbyRuntimePausedAfterStart identifies the + // `lobby.runtime_paused_after_start` notification. + NotificationTypeLobbyRuntimePausedAfterStart NotificationType = "lobby.runtime_paused_after_start" + + // NotificationTypeLobbyApplicationSubmitted identifies the + // `lobby.application.submitted` notification. + NotificationTypeLobbyApplicationSubmitted NotificationType = "lobby.application.submitted" + + // NotificationTypeLobbyMembershipApproved identifies the + // `lobby.membership.approved` notification. + NotificationTypeLobbyMembershipApproved NotificationType = "lobby.membership.approved" + + // NotificationTypeLobbyMembershipRejected identifies the + // `lobby.membership.rejected` notification. + NotificationTypeLobbyMembershipRejected NotificationType = "lobby.membership.rejected" + + // NotificationTypeLobbyInviteCreated identifies the + // `lobby.invite.created` notification. + NotificationTypeLobbyInviteCreated NotificationType = "lobby.invite.created" + + // NotificationTypeLobbyInviteRedeemed identifies the + // `lobby.invite.redeemed` notification. + NotificationTypeLobbyInviteRedeemed NotificationType = "lobby.invite.redeemed" + + // NotificationTypeLobbyInviteExpired identifies the + // `lobby.invite.expired` notification. + NotificationTypeLobbyInviteExpired NotificationType = "lobby.invite.expired" +) + +// String returns the wire value for notificationType. +func (notificationType NotificationType) String() string { + return string(notificationType) +} + +// IsKnown reports whether notificationType belongs to the frozen catalog. +func (notificationType NotificationType) IsKnown() bool { + switch notificationType { + case NotificationTypeGeoReviewRecommended, + NotificationTypeGameTurnReady, + NotificationTypeGameFinished, + NotificationTypeGameGenerationFailed, + NotificationTypeLobbyRuntimePausedAfterStart, + NotificationTypeLobbyApplicationSubmitted, + NotificationTypeLobbyMembershipApproved, + NotificationTypeLobbyMembershipRejected, + NotificationTypeLobbyInviteCreated, + NotificationTypeLobbyInviteRedeemed, + NotificationTypeLobbyInviteExpired: + return true + default: + return false + } +} + +// ExpectedProducer returns the frozen producer for notificationType. +func (notificationType NotificationType) ExpectedProducer() Producer { + switch notificationType { + case NotificationTypeGeoReviewRecommended: + return ProducerGeoProfile + case NotificationTypeGameTurnReady, + NotificationTypeGameFinished, + NotificationTypeGameGenerationFailed: + return ProducerGameMaster + case NotificationTypeLobbyRuntimePausedAfterStart, + NotificationTypeLobbyApplicationSubmitted, + NotificationTypeLobbyMembershipApproved, + NotificationTypeLobbyMembershipRejected, + NotificationTypeLobbyInviteCreated, + NotificationTypeLobbyInviteRedeemed, + NotificationTypeLobbyInviteExpired: + return ProducerGameLobby + default: + return "" + } +} + +// SupportsAudience reports whether notificationType supports audienceKind. +func (notificationType NotificationType) SupportsAudience(audienceKind AudienceKind) bool { + switch notificationType { + case NotificationTypeGeoReviewRecommended, + NotificationTypeGameGenerationFailed, + NotificationTypeLobbyRuntimePausedAfterStart: + return audienceKind == AudienceKindAdminEmail + case NotificationTypeLobbyApplicationSubmitted: + return audienceKind == AudienceKindUser || audienceKind == AudienceKindAdminEmail + default: + return audienceKind == AudienceKindUser + } +} + +// SupportsChannel reports whether notificationType uses channel for +// audienceKind. +func (notificationType NotificationType) SupportsChannel(audienceKind AudienceKind, channel Channel) bool { + switch notificationType { + case NotificationTypeGeoReviewRecommended, + NotificationTypeGameGenerationFailed, + NotificationTypeLobbyRuntimePausedAfterStart: + return audienceKind == AudienceKindAdminEmail && channel == ChannelEmail + case NotificationTypeLobbyApplicationSubmitted: + if audienceKind == AudienceKindAdminEmail { + return channel == ChannelEmail + } + return channel == ChannelPush || channel == ChannelEmail + case NotificationTypeLobbyInviteExpired: + return audienceKind == AudienceKindUser && channel == ChannelEmail + default: + return audienceKind == AudienceKindUser && (channel == ChannelPush || channel == ChannelEmail) + } +} + +// Producer identifies one supported upstream producer. +type Producer string + +const ( + // ProducerGeoProfile identifies Geo Profile Service. + ProducerGeoProfile Producer = "geoprofile" + + // ProducerGameMaster identifies Game Master. + ProducerGameMaster Producer = "game_master" + + // ProducerGameLobby identifies Game Lobby. + ProducerGameLobby Producer = "game_lobby" +) + +// String returns the wire value for producer. +func (producer Producer) String() string { + return string(producer) +} + +// IsKnown reports whether producer belongs to the frozen producer set. +func (producer Producer) IsKnown() bool { + switch producer { + case ProducerGeoProfile, ProducerGameMaster, ProducerGameLobby: + return true + default: + return false + } +} + +// AudienceKind identifies one supported target-audience kind. +type AudienceKind string + +const ( + // AudienceKindUser identifies user-targeted notifications. + AudienceKindUser AudienceKind = "user" + + // AudienceKindAdminEmail identifies administrator-email notifications. + AudienceKindAdminEmail AudienceKind = "admin_email" +) + +// String returns the wire value for audienceKind. +func (audienceKind AudienceKind) String() string { + return string(audienceKind) +} + +// IsKnown reports whether audienceKind belongs to the frozen audience set. +func (audienceKind AudienceKind) IsKnown() bool { + switch audienceKind { + case AudienceKindUser, AudienceKindAdminEmail: + return true + default: + return false + } +} + +// Channel identifies one durable notification-delivery channel slot. +type Channel string + +const ( + // ChannelPush identifies the push-delivery channel. + ChannelPush Channel = "push" + + // ChannelEmail identifies the email-delivery channel. + ChannelEmail Channel = "email" +) + +// String returns the wire value for channel. +func (channel Channel) String() string { + return string(channel) +} + +// IsKnown reports whether channel belongs to the frozen channel vocabulary. +func (channel Channel) IsKnown() bool { + switch channel { + case ChannelPush, ChannelEmail: + return true + default: + return false + } +} + +// Metadata stores producer-owned envelope fields shared by every notification +// intent. +type Metadata struct { + // IdempotencyKey stores the producer-owned idempotency key scoped together + // with the producer name. + IdempotencyKey string + + // OccurredAt stores when the producer says the underlying business event + // happened. Constructors normalize the value to UTC millisecond precision. + OccurredAt time.Time + + // RequestID stores the optional producer-side request identifier. + RequestID string + + // TraceID stores the optional producer-side trace identifier. + TraceID string +} + +// Intent stores one normalized notification intent accepted by Notification +// Service. +type Intent struct { + // NotificationType stores the frozen notification vocabulary value. + NotificationType NotificationType + + // Producer stores the frozen producer identifier. + Producer Producer + + // AudienceKind stores the normalized target audience kind. + AudienceKind AudienceKind + + // RecipientUserIDs stores the normalized sorted unique user-recipient set + // when AudienceKind is AudienceKindUser. + RecipientUserIDs []string + + // IdempotencyKey stores the producer-owned idempotency key. + IdempotencyKey string + + // OccurredAt stores when the producer says the underlying business event + // happened. + OccurredAt time.Time + + // RequestID stores the optional producer-side request identifier. + RequestID string + + // TraceID stores the optional producer-side trace identifier. + TraceID string + + // PayloadJSON stores the canonical normalized payload JSON string used for + // duplicate detection. + PayloadJSON string +} + +// Validate reports whether intent contains a complete normalized intake +// request. +func (intent Intent) Validate() error { + if !intent.NotificationType.IsKnown() { + return fmt.Errorf("intent notification type %q is unsupported", intent.NotificationType) + } + if !intent.Producer.IsKnown() { + return fmt.Errorf("intent producer %q is unsupported", intent.Producer) + } + if expected := intent.NotificationType.ExpectedProducer(); intent.Producer != expected { + return fmt.Errorf( + "intent producer %q does not match notification type %q", + intent.Producer, + intent.NotificationType, + ) + } + if !intent.AudienceKind.IsKnown() { + return fmt.Errorf("intent audience kind %q is unsupported", intent.AudienceKind) + } + if !intent.NotificationType.SupportsAudience(intent.AudienceKind) { + return fmt.Errorf( + "intent notification type %q does not support audience kind %q", + intent.NotificationType, + intent.AudienceKind, + ) + } + if strings.TrimSpace(intent.IdempotencyKey) == "" { + return errors.New("intent idempotency key must not be empty") + } + if err := validateTimestamp("intent occurred at", intent.OccurredAt); err != nil { + return err + } + if strings.TrimSpace(intent.PayloadJSON) == "" { + return errors.New("intent payload json must not be empty") + } + + switch intent.AudienceKind { + case AudienceKindUser: + if len(intent.RecipientUserIDs) == 0 { + return errors.New("intent recipient user ids must not be empty for audience kind user") + } + for index, userID := range intent.RecipientUserIDs { + if userID == "" { + return fmt.Errorf("intent recipient user ids[%d] must not be empty", index) + } + if index > 0 && intent.RecipientUserIDs[index-1] >= userID { + return errors.New("intent recipient user ids must be sorted strictly ascending") + } + } + case AudienceKindAdminEmail: + if len(intent.RecipientUserIDs) > 0 { + return errors.New("intent recipient user ids must be empty for audience kind admin_email") + } + } + + return nil +} + +// Values returns Redis Stream field values for intent. It validates and +// normalizes the recipient set, event timestamp, and payload before building +// the field map. +func (intent Intent) Values() (map[string]any, error) { + normalized, err := normalizeIntent(intent) + if err != nil { + return nil, err + } + + values := map[string]any{ + fieldNotificationType: normalized.NotificationType.String(), + fieldProducer: normalized.Producer.String(), + fieldAudienceKind: normalized.AudienceKind.String(), + fieldIdempotencyKey: normalized.IdempotencyKey, + fieldOccurredAtMS: strconv.FormatInt(normalized.OccurredAt.UnixMilli(), 10), + fieldPayloadJSON: normalized.PayloadJSON, + } + if normalized.AudienceKind == AudienceKindUser { + recipientUserIDs, err := json.Marshal(normalized.RecipientUserIDs) + if err != nil { + return nil, fmt.Errorf("marshal recipient_user_ids_json: %w", err) + } + values[fieldRecipientUserIDs] = string(recipientUserIDs) + } + if normalized.RequestID != "" { + values[fieldRequestID] = normalized.RequestID + } + if normalized.TraceID != "" { + values[fieldTraceID] = normalized.TraceID + } + + return values, nil +} + +// DecodeIntent validates one raw Redis Stream entry and returns the normalized +// notification intent frozen by the producer contract. +func DecodeIntent(fields map[string]any) (Intent, error) { + if fields == nil { + return Intent{}, errors.New("intent fields must not be nil") + } + + if err := validateFieldSet(fields); err != nil { + return Intent{}, err + } + + notificationTypeValue, err := requiredString(fields, fieldNotificationType) + if err != nil { + return Intent{}, err + } + producerValue, err := requiredString(fields, fieldProducer) + if err != nil { + return Intent{}, err + } + audienceKindValue, err := requiredString(fields, fieldAudienceKind) + if err != nil { + return Intent{}, err + } + idempotencyKeyValue, err := requiredString(fields, fieldIdempotencyKey) + if err != nil { + return Intent{}, err + } + occurredAtValue, err := requiredString(fields, fieldOccurredAtMS) + if err != nil { + return Intent{}, err + } + payloadJSONValue, err := requiredString(fields, fieldPayloadJSON) + if err != nil { + return Intent{}, err + } + + intent := Intent{ + NotificationType: NotificationType(notificationTypeValue), + Producer: Producer(producerValue), + AudienceKind: AudienceKind(audienceKindValue), + IdempotencyKey: idempotencyKeyValue, + } + + if requestIDValue, ok, err := optionalString(fields, fieldRequestID); err != nil { + return Intent{}, err + } else if ok { + intent.RequestID = requestIDValue + } + if traceIDValue, ok, err := optionalString(fields, fieldTraceID); err != nil { + return Intent{}, err + } else if ok { + intent.TraceID = traceIDValue + } + + occurredAt, err := parseUnixMilliseconds(occurredAtValue) + if err != nil { + return Intent{}, err + } + intent.OccurredAt = occurredAt + + if !intent.NotificationType.IsKnown() { + return Intent{}, fmt.Errorf("stream field %q value %q is unsupported", fieldNotificationType, notificationTypeValue) + } + if !intent.Producer.IsKnown() { + return Intent{}, fmt.Errorf("stream field %q value %q is unsupported", fieldProducer, producerValue) + } + if !intent.AudienceKind.IsKnown() { + return Intent{}, fmt.Errorf("stream field %q value %q is unsupported", fieldAudienceKind, audienceKindValue) + } + if intent.NotificationType.ExpectedProducer() != intent.Producer { + return Intent{}, fmt.Errorf( + "stream field %q value %q does not match notification type %q", + fieldProducer, + producerValue, + intent.NotificationType, + ) + } + if !intent.NotificationType.SupportsAudience(intent.AudienceKind) { + return Intent{}, fmt.Errorf( + "stream field %q value %q is unsupported for notification type %q", + fieldAudienceKind, + audienceKindValue, + intent.NotificationType, + ) + } + + switch intent.AudienceKind { + case AudienceKindUser: + recipientUserIDsValue, err := requiredString(fields, fieldRecipientUserIDs) + if err != nil { + return Intent{}, err + } + recipientUserIDs, err := normalizeRecipientUserIDs(recipientUserIDsValue) + if err != nil { + return Intent{}, err + } + intent.RecipientUserIDs = recipientUserIDs + case AudienceKindAdminEmail: + if _, found := fields[fieldRecipientUserIDs]; found { + return Intent{}, fmt.Errorf("stream field %q must not be present for audience kind %q", fieldRecipientUserIDs, intent.AudienceKind) + } + } + + canonicalPayloadJSON, err := validateAndNormalizePayload(intent.NotificationType, payloadJSONValue) + if err != nil { + return Intent{}, err + } + intent.PayloadJSON = canonicalPayloadJSON + + if err := intent.Validate(); err != nil { + return Intent{}, err + } + + return intent, nil +} + +func newIntent( + notificationType NotificationType, + producer Producer, + audienceKind AudienceKind, + recipientUserIDs []string, + metadata Metadata, + payload any, +) (Intent, error) { + payloadJSON, err := json.Marshal(payload) + if err != nil { + return Intent{}, fmt.Errorf("marshal payload_json: %w", err) + } + + return normalizeIntent(Intent{ + NotificationType: notificationType, + Producer: producer, + AudienceKind: audienceKind, + RecipientUserIDs: append([]string(nil), recipientUserIDs...), + IdempotencyKey: metadata.IdempotencyKey, + OccurredAt: normalizeTimestamp(metadata.OccurredAt), + RequestID: metadata.RequestID, + TraceID: metadata.TraceID, + PayloadJSON: string(payloadJSON), + }) +} + +func normalizeIntent(intent Intent) (Intent, error) { + normalized := intent + normalized.OccurredAt = normalizeTimestamp(intent.OccurredAt) + + switch normalized.AudienceKind { + case AudienceKindUser: + recipientUserIDs, err := normalizeRecipientUserIDValues(normalized.RecipientUserIDs) + if err != nil { + return Intent{}, err + } + normalized.RecipientUserIDs = recipientUserIDs + case AudienceKindAdminEmail: + if len(normalized.RecipientUserIDs) > 0 { + return Intent{}, errors.New("intent recipient user ids must be empty for audience kind admin_email") + } + default: + if len(normalized.RecipientUserIDs) > 0 { + recipientUserIDs, err := normalizeRecipientUserIDValues(normalized.RecipientUserIDs) + if err != nil { + return Intent{}, err + } + normalized.RecipientUserIDs = recipientUserIDs + } + } + + canonicalPayloadJSON, err := validateAndNormalizePayload(normalized.NotificationType, normalized.PayloadJSON) + if err != nil { + return Intent{}, err + } + normalized.PayloadJSON = canonicalPayloadJSON + + if err := normalized.Validate(); err != nil { + return Intent{}, err + } + + return normalized, nil +} + +func normalizeTimestamp(value time.Time) time.Time { + if value.IsZero() { + return value + } + + return value.UTC().Truncate(time.Millisecond) +} + +func validateFieldSet(fields map[string]any) error { + missing := make([]string, 0, len(requiredFieldNames)) + for name := range requiredFieldNames { + if _, ok := fields[name]; !ok { + missing = append(missing, name) + } + } + sort.Strings(missing) + if len(missing) > 0 { + return fmt.Errorf("intent is missing required fields: %s", strings.Join(missing, ", ")) + } + + unexpected := make([]string, 0) + for name := range fields { + if _, ok := requiredFieldNames[name]; ok { + continue + } + if _, ok := optionalFieldNames[name]; ok { + continue + } + unexpected = append(unexpected, name) + } + sort.Strings(unexpected) + if len(unexpected) > 0 { + return fmt.Errorf("intent contains unsupported fields: %s", strings.Join(unexpected, ", ")) + } + + return nil +} + +func requiredString(fields map[string]any, name string) (string, error) { + value, ok := fields[name] + if !ok { + return "", fmt.Errorf("stream field %q is required", name) + } + + result, ok := rawString(value) + if !ok { + return "", fmt.Errorf("stream field %q must be a string", name) + } + + return result, nil +} + +func optionalString(fields map[string]any, name string) (string, bool, error) { + value, ok := fields[name] + if !ok { + return "", false, nil + } + + result, ok := rawString(value) + if !ok { + return "", false, fmt.Errorf("stream field %q must be a string", name) + } + + return result, true, nil +} + +func rawString(value any) (string, bool) { + switch typed := value.(type) { + case string: + return typed, true + case []byte: + return string(typed), true + default: + return "", false + } +} + +func parseUnixMilliseconds(raw string) (time.Time, error) { + if raw == "" { + return time.Time{}, fmt.Errorf("stream field %q must be a base-10 Unix milliseconds string", fieldOccurredAtMS) + } + for _, r := range raw { + if r < '0' || r > '9' { + return time.Time{}, fmt.Errorf("stream field %q must be a base-10 Unix milliseconds string", fieldOccurredAtMS) + } + } + + value, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return time.Time{}, fmt.Errorf("stream field %q must be a base-10 Unix milliseconds string", fieldOccurredAtMS) + } + + return time.UnixMilli(value).UTC(), nil +} + +func normalizeRecipientUserIDs(raw string) ([]string, error) { + var values []string + if err := decodeStrictJSON("decode recipient_user_ids_json", raw, &values, false); err != nil { + return nil, err + } + + return normalizeRecipientUserIDValues(values) +} + +func normalizeRecipientUserIDValues(values []string) ([]string, error) { + if len(values) == 0 { + return nil, errors.New("recipient_user_ids_json must contain at least one user id") + } + + seen := make(map[string]struct{}, len(values)) + normalized := make([]string, 0, len(values)) + for index, value := range values { + if value == "" { + return nil, fmt.Errorf("recipient_user_ids_json[%d] must not be empty", index) + } + if _, ok := seen[value]; ok { + return nil, fmt.Errorf("recipient_user_ids_json[%d] duplicates user id %q", index, value) + } + seen[value] = struct{}{} + normalized = append(normalized, value) + } + + sort.Strings(normalized) + + return normalized, nil +} + +func validateAndNormalizePayload(notificationType NotificationType, raw string) (string, error) { + payloadObject, err := decodeJSONObjectRaw("decode payload_json", raw) + if err != nil { + return "", err + } + if err := validatePayloadObject(notificationType, payloadObject); err != nil { + return "", err + } + + normalizedValue, err := decodeNormalizedJSONValue("decode payload_json", raw) + if err != nil { + return "", err + } + + normalizedPayload, err := json.Marshal(normalizedValue) + if err != nil { + return "", fmt.Errorf("normalize payload_json: %w", err) + } + + return string(normalizedPayload), nil +} + +func validatePayloadObject(notificationType NotificationType, payload map[string]json.RawMessage) error { + switch notificationType { + case NotificationTypeGeoReviewRecommended: + return validateStringFields(payload, "user_id", "user_email", "observed_country", "usual_connection_country", "review_reason") + case NotificationTypeGameTurnReady: + if err := validateStringFields(payload, "game_id", "game_name"); err != nil { + return err + } + return validatePositiveIntFields(payload, "turn_number") + case NotificationTypeGameFinished: + if err := validateStringFields(payload, "game_id", "game_name"); err != nil { + return err + } + return validatePositiveIntFields(payload, "final_turn_number") + case NotificationTypeGameGenerationFailed: + return validateStringFields(payload, "game_id", "game_name", "failure_reason") + case NotificationTypeLobbyRuntimePausedAfterStart: + return validateStringFields(payload, "game_id", "game_name") + case NotificationTypeLobbyApplicationSubmitted: + return validateStringFields(payload, "game_id", "game_name", "applicant_user_id", "applicant_name") + case NotificationTypeLobbyMembershipApproved, NotificationTypeLobbyMembershipRejected: + return validateStringFields(payload, "game_id", "game_name") + case NotificationTypeLobbyInviteCreated: + return validateStringFields(payload, "game_id", "game_name", "inviter_user_id", "inviter_name") + case NotificationTypeLobbyInviteRedeemed, NotificationTypeLobbyInviteExpired: + return validateStringFields(payload, "game_id", "game_name", "invitee_user_id", "invitee_name") + default: + return fmt.Errorf("payload_json notification type %q is unsupported", notificationType) + } +} + +func validateStringFields(payload map[string]json.RawMessage, names ...string) error { + for _, name := range names { + var value string + if err := decodeRequiredJSONField(payload, name, &value); err != nil { + return err + } + if value == "" { + return fmt.Errorf("payload_json.%s must not be empty", name) + } + } + + return nil +} + +func validatePositiveIntFields(payload map[string]json.RawMessage, names ...string) error { + for _, name := range names { + var value int64 + if err := decodeRequiredJSONField(payload, name, &value); err != nil { + return err + } + if value < 1 { + return fmt.Errorf("payload_json.%s must be at least 1", name) + } + } + + return nil +} + +func decodeRequiredJSONField(payload map[string]json.RawMessage, name string, target any) error { + raw, ok := payload[name] + if !ok { + return fmt.Errorf("payload_json.%s is required", name) + } + + if err := decodeStrictJSON("decode payload_json."+name, string(raw), target, false); err != nil { + return err + } + + return nil +} + +func decodeJSONObjectRaw(label string, raw string) (map[string]json.RawMessage, error) { + var value map[string]json.RawMessage + if err := decodeStrictJSON(label, raw, &value, false); err != nil { + return nil, err + } + if value == nil { + return nil, errors.New("payload_json must be a JSON object") + } + + return value, nil +} + +func decodeNormalizedJSONValue(label string, raw string) (any, error) { + decoder := json.NewDecoder(bytes.NewBufferString(raw)) + decoder.UseNumber() + + var value any + if err := decoder.Decode(&value); err != nil { + return nil, fmt.Errorf("%s: %w", label, err) + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return nil, fmt.Errorf("%s: unexpected trailing JSON input", label) + } + return nil, fmt.Errorf("%s: %w", label, err) + } + + object, ok := value.(map[string]any) + if !ok || object == nil { + return nil, errors.New("payload_json must be a JSON object") + } + + return value, nil +} + +func decodeStrictJSON(label string, raw string, target any, useNumber bool) error { + decoder := json.NewDecoder(bytes.NewBufferString(raw)) + if useNumber { + decoder.UseNumber() + } + + if err := decoder.Decode(target); err != nil { + return fmt.Errorf("%s: %w", label, err) + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + if err == nil { + return fmt.Errorf("%s: unexpected trailing JSON input", label) + } + + return fmt.Errorf("%s: %w", label, err) + } + + return nil +} + +func validateTimestamp(name string, value time.Time) error { + if value.IsZero() { + return fmt.Errorf("%s must not be zero", name) + } + if !value.Equal(value.UTC()) { + return fmt.Errorf("%s must be UTC", name) + } + if !value.Equal(value.Truncate(time.Millisecond)) { + return fmt.Errorf("%s must use millisecond precision", name) + } + + return nil +} diff --git a/pkg/notificationintent/intent_test.go b/pkg/notificationintent/intent_test.go new file mode 100644 index 0000000..caf792a --- /dev/null +++ b/pkg/notificationintent/intent_test.go @@ -0,0 +1,298 @@ +package notificationintent + +import ( + "encoding/json" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestConstructorsBuildExpectedIntentValues(t *testing.T) { + t.Parallel() + + metadata := Metadata{ + IdempotencyKey: "idempotency-1", + OccurredAt: time.UnixMilli(1775121700000).Add(123 * time.Nanosecond), + RequestID: "request-1", + TraceID: "trace-1", + } + + tests := []struct { + name string + build func() (Intent, error) + notificationType NotificationType + producer Producer + audienceKind AudienceKind + recipientUserIDs []string + payloadJSON string + }{ + { + name: "geo review recommended", + build: func() (Intent, error) { + return NewGeoReviewRecommendedIntent(metadata, GeoReviewRecommendedPayload{ + UserID: "user-1", + UserEmail: "pilot@example.com", + ObservedCountry: "DE", + UsualConnectionCountry: "PL", + ReviewReason: "country_mismatch", + }) + }, + notificationType: NotificationTypeGeoReviewRecommended, + producer: ProducerGeoProfile, + audienceKind: AudienceKindAdminEmail, + payloadJSON: `{"user_id":"user-1","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}`, + }, + { + name: "game turn ready", + build: func() (Intent, error) { + return NewGameTurnReadyIntent(metadata, []string{"user-2", "user-1"}, GameTurnReadyPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + TurnNumber: 54, + }) + }, + notificationType: NotificationTypeGameTurnReady, + producer: ProducerGameMaster, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"user-1", "user-2"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","turn_number":54}`, + }, + { + name: "game finished", + build: func() (Intent, error) { + return NewGameFinishedIntent(metadata, []string{"user-1", "user-2"}, GameFinishedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + FinalTurnNumber: 55, + }) + }, + notificationType: NotificationTypeGameFinished, + producer: ProducerGameMaster, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"user-1", "user-2"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","final_turn_number":55}`, + }, + { + name: "game generation failed", + build: func() (Intent, error) { + return NewGameGenerationFailedIntent(metadata, GameGenerationFailedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + FailureReason: "engine_timeout", + }) + }, + notificationType: NotificationTypeGameGenerationFailed, + producer: ProducerGameMaster, + audienceKind: AudienceKindAdminEmail, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","failure_reason":"engine_timeout"}`, + }, + { + name: "lobby runtime paused after start", + build: func() (Intent, error) { + return NewLobbyRuntimePausedAfterStartIntent(metadata, LobbyRuntimePausedAfterStartPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + notificationType: NotificationTypeLobbyRuntimePausedAfterStart, + producer: ProducerGameLobby, + audienceKind: AudienceKindAdminEmail, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash"}`, + }, + { + name: "private lobby application submitted", + build: func() (Intent, error) { + return NewPrivateLobbyApplicationSubmittedIntent(metadata, "owner-1", LobbyApplicationSubmittedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + ApplicantUserID: "user-2", + ApplicantName: "Nova Pilot", + }) + }, + notificationType: NotificationTypeLobbyApplicationSubmitted, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"owner-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","applicant_user_id":"user-2","applicant_name":"Nova Pilot"}`, + }, + { + name: "public lobby application submitted", + build: func() (Intent, error) { + return NewPublicLobbyApplicationSubmittedIntent(metadata, LobbyApplicationSubmittedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + ApplicantUserID: "user-2", + ApplicantName: "Nova Pilot", + }) + }, + notificationType: NotificationTypeLobbyApplicationSubmitted, + producer: ProducerGameLobby, + audienceKind: AudienceKindAdminEmail, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","applicant_user_id":"user-2","applicant_name":"Nova Pilot"}`, + }, + { + name: "lobby membership approved", + build: func() (Intent, error) { + return NewLobbyMembershipApprovedIntent(metadata, "applicant-1", LobbyMembershipApprovedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + notificationType: NotificationTypeLobbyMembershipApproved, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"applicant-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash"}`, + }, + { + name: "lobby membership rejected", + build: func() (Intent, error) { + return NewLobbyMembershipRejectedIntent(metadata, "applicant-1", LobbyMembershipRejectedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + }) + }, + notificationType: NotificationTypeLobbyMembershipRejected, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"applicant-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash"}`, + }, + { + name: "lobby invite created", + build: func() (Intent, error) { + return NewLobbyInviteCreatedIntent(metadata, "invited-1", LobbyInviteCreatedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviterUserID: "owner-1", + InviterName: "Owner Pilot", + }) + }, + notificationType: NotificationTypeLobbyInviteCreated, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"invited-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","inviter_user_id":"owner-1","inviter_name":"Owner Pilot"}`, + }, + { + name: "lobby invite redeemed", + build: func() (Intent, error) { + return NewLobbyInviteRedeemedIntent(metadata, "owner-1", LobbyInviteRedeemedPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviteeUserID: "invitee-1", + InviteeName: "Nova Pilot", + }) + }, + notificationType: NotificationTypeLobbyInviteRedeemed, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"owner-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","invitee_user_id":"invitee-1","invitee_name":"Nova Pilot"}`, + }, + { + name: "lobby invite expired", + build: func() (Intent, error) { + return NewLobbyInviteExpiredIntent(metadata, "owner-1", LobbyInviteExpiredPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + InviteeUserID: "invitee-1", + InviteeName: "Nova Pilot", + }) + }, + notificationType: NotificationTypeLobbyInviteExpired, + producer: ProducerGameLobby, + audienceKind: AudienceKindUser, + recipientUserIDs: []string{"owner-1"}, + payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","invitee_user_id":"invitee-1","invitee_name":"Nova Pilot"}`, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + intent, err := tt.build() + require.NoError(t, err) + require.Equal(t, tt.notificationType, intent.NotificationType) + require.Equal(t, tt.producer, intent.Producer) + require.Equal(t, tt.audienceKind, intent.AudienceKind) + require.Equal(t, tt.recipientUserIDs, intent.RecipientUserIDs) + + values, err := intent.Values() + require.NoError(t, err) + require.Equal(t, tt.notificationType.String(), values[fieldNotificationType]) + require.Equal(t, tt.producer.String(), values[fieldProducer]) + require.Equal(t, tt.audienceKind.String(), values[fieldAudienceKind]) + require.Equal(t, metadata.IdempotencyKey, values[fieldIdempotencyKey]) + require.Equal(t, "1775121700000", values[fieldOccurredAtMS]) + require.Equal(t, metadata.RequestID, values[fieldRequestID]) + require.Equal(t, metadata.TraceID, values[fieldTraceID]) + require.JSONEq(t, tt.payloadJSON, values[fieldPayloadJSON].(string)) + + if len(tt.recipientUserIDs) == 0 { + require.NotContains(t, values, fieldRecipientUserIDs) + return + } + + var recipientUserIDs []string + require.NoError(t, json.Unmarshal([]byte(values[fieldRecipientUserIDs].(string)), &recipientUserIDs)) + require.Equal(t, tt.recipientUserIDs, recipientUserIDs) + }) + } +} + +func TestUserRecipientConstructorsRejectDuplicates(t *testing.T) { + t.Parallel() + + _, err := NewGameTurnReadyIntent(defaultMetadata(), []string{"user-1", "user-1"}, GameTurnReadyPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + TurnNumber: 54, + }) + require.Error(t, err) + require.Contains(t, err.Error(), "duplicates user id") +} + +func TestConstructorsRejectInvalidPayloads(t *testing.T) { + t.Parallel() + + _, err := NewGameTurnReadyIntent(defaultMetadata(), []string{"user-1"}, GameTurnReadyPayload{ + GameName: "Nebula Clash", + TurnNumber: 54, + }) + require.Error(t, err) + require.Contains(t, err.Error(), "payload_json.game_id must not be empty") + + _, err = NewGameTurnReadyIntent(defaultMetadata(), []string{"user-1"}, GameTurnReadyPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + TurnNumber: 0, + }) + require.Error(t, err) + require.Contains(t, err.Error(), "payload_json.turn_number must be at least 1") +} + +func TestDecodeIntentRejectsMissingRequiredTopLevelField(t *testing.T) { + t.Parallel() + + _, err := DecodeIntent(map[string]any{ + fieldNotificationType: NotificationTypeGameTurnReady.String(), + fieldProducer: ProducerGameMaster.String(), + fieldAudienceKind: AudienceKindUser.String(), + fieldRecipientUserIDs: `["user-1"]`, + fieldIdempotencyKey: "game-1:turn-54", + fieldOccurredAtMS: "1775121700000", + }) + require.Error(t, err) + require.Contains(t, err.Error(), fieldPayloadJSON) +} + +func defaultMetadata() Metadata { + return Metadata{ + IdempotencyKey: "idempotency-1", + OccurredAt: time.UnixMilli(1775121700000), + } +} diff --git a/pkg/notificationintent/payloads.go b/pkg/notificationintent/payloads.go new file mode 100644 index 0000000..9b72f0d --- /dev/null +++ b/pkg/notificationintent/payloads.go @@ -0,0 +1,162 @@ +package notificationintent + +// GeoReviewRecommendedPayload stores the normalized payload for +// `geo.review_recommended`. +type GeoReviewRecommendedPayload struct { + UserID string `json:"user_id"` + UserEmail string `json:"user_email"` + ObservedCountry string `json:"observed_country"` + UsualConnectionCountry string `json:"usual_connection_country"` + ReviewReason string `json:"review_reason"` +} + +// GameTurnReadyPayload stores the normalized payload for `game.turn.ready`. +type GameTurnReadyPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + TurnNumber int64 `json:"turn_number"` +} + +// GameFinishedPayload stores the normalized payload for `game.finished`. +type GameFinishedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + FinalTurnNumber int64 `json:"final_turn_number"` +} + +// GameGenerationFailedPayload stores the normalized payload for +// `game.generation_failed`. +type GameGenerationFailedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + FailureReason string `json:"failure_reason"` +} + +// LobbyRuntimePausedAfterStartPayload stores the normalized payload for +// `lobby.runtime_paused_after_start`. +type LobbyRuntimePausedAfterStartPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` +} + +// LobbyApplicationSubmittedPayload stores the normalized payload for +// `lobby.application.submitted`. +type LobbyApplicationSubmittedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + ApplicantUserID string `json:"applicant_user_id"` + ApplicantName string `json:"applicant_name"` +} + +// LobbyMembershipApprovedPayload stores the normalized payload for +// `lobby.membership.approved`. +type LobbyMembershipApprovedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` +} + +// LobbyMembershipRejectedPayload stores the normalized payload for +// `lobby.membership.rejected`. +type LobbyMembershipRejectedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` +} + +// LobbyInviteCreatedPayload stores the normalized payload for +// `lobby.invite.created`. +type LobbyInviteCreatedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + InviterUserID string `json:"inviter_user_id"` + InviterName string `json:"inviter_name"` +} + +// LobbyInviteRedeemedPayload stores the normalized payload for +// `lobby.invite.redeemed`. +type LobbyInviteRedeemedPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + InviteeUserID string `json:"invitee_user_id"` + InviteeName string `json:"invitee_name"` +} + +// LobbyInviteExpiredPayload stores the normalized payload for +// `lobby.invite.expired`. +type LobbyInviteExpiredPayload struct { + GameID string `json:"game_id"` + GameName string `json:"game_name"` + InviteeUserID string `json:"invitee_user_id"` + InviteeName string `json:"invitee_name"` +} + +// NewGeoReviewRecommendedIntent builds the admin-email intent published by Geo +// Profile Service when a user becomes review-worthy. +func NewGeoReviewRecommendedIntent(metadata Metadata, payload GeoReviewRecommendedPayload) (Intent, error) { + return newIntent(NotificationTypeGeoReviewRecommended, ProducerGeoProfile, AudienceKindAdminEmail, nil, metadata, payload) +} + +// NewGameTurnReadyIntent builds the user-targeted intent published by Game +// Master when a new turn is ready for active accepted participants. +func NewGameTurnReadyIntent(metadata Metadata, recipientUserIDs []string, payload GameTurnReadyPayload) (Intent, error) { + return newIntent(NotificationTypeGameTurnReady, ProducerGameMaster, AudienceKindUser, recipientUserIDs, metadata, payload) +} + +// NewGameFinishedIntent builds the user-targeted intent published by Game +// Master when a running game finishes. +func NewGameFinishedIntent(metadata Metadata, recipientUserIDs []string, payload GameFinishedPayload) (Intent, error) { + return newIntent(NotificationTypeGameFinished, ProducerGameMaster, AudienceKindUser, recipientUserIDs, metadata, payload) +} + +// NewGameGenerationFailedIntent builds the admin-email intent published by +// Game Master when turn generation fails. +func NewGameGenerationFailedIntent(metadata Metadata, payload GameGenerationFailedPayload) (Intent, error) { + return newIntent(NotificationTypeGameGenerationFailed, ProducerGameMaster, AudienceKindAdminEmail, nil, metadata, payload) +} + +// NewLobbyRuntimePausedAfterStartIntent builds the admin-email intent +// published by Game Lobby when a game is paused after runtime startup. +func NewLobbyRuntimePausedAfterStartIntent(metadata Metadata, payload LobbyRuntimePausedAfterStartPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyRuntimePausedAfterStart, ProducerGameLobby, AudienceKindAdminEmail, nil, metadata, payload) +} + +// NewPrivateLobbyApplicationSubmittedIntent builds the private-game owner +// intent published by Game Lobby when an application is submitted. +func NewPrivateLobbyApplicationSubmittedIntent(metadata Metadata, ownerUserID string, payload LobbyApplicationSubmittedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyApplicationSubmitted, ProducerGameLobby, AudienceKindUser, []string{ownerUserID}, metadata, payload) +} + +// NewPublicLobbyApplicationSubmittedIntent builds the public-game admin-email +// intent published by Game Lobby when an application is submitted. +func NewPublicLobbyApplicationSubmittedIntent(metadata Metadata, payload LobbyApplicationSubmittedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyApplicationSubmitted, ProducerGameLobby, AudienceKindAdminEmail, nil, metadata, payload) +} + +// NewLobbyMembershipApprovedIntent builds the applicant-user intent published +// by Game Lobby when membership is approved. +func NewLobbyMembershipApprovedIntent(metadata Metadata, applicantUserID string, payload LobbyMembershipApprovedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyMembershipApproved, ProducerGameLobby, AudienceKindUser, []string{applicantUserID}, metadata, payload) +} + +// NewLobbyMembershipRejectedIntent builds the applicant-user intent published +// by Game Lobby when membership is rejected. +func NewLobbyMembershipRejectedIntent(metadata Metadata, applicantUserID string, payload LobbyMembershipRejectedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyMembershipRejected, ProducerGameLobby, AudienceKindUser, []string{applicantUserID}, metadata, payload) +} + +// NewLobbyInviteCreatedIntent builds the invited-user intent published by Game +// Lobby when a private-game invite is created. +func NewLobbyInviteCreatedIntent(metadata Metadata, invitedUserID string, payload LobbyInviteCreatedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyInviteCreated, ProducerGameLobby, AudienceKindUser, []string{invitedUserID}, metadata, payload) +} + +// NewLobbyInviteRedeemedIntent builds the private-game owner intent published +// by Game Lobby when an invite is redeemed. +func NewLobbyInviteRedeemedIntent(metadata Metadata, ownerUserID string, payload LobbyInviteRedeemedPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyInviteRedeemed, ProducerGameLobby, AudienceKindUser, []string{ownerUserID}, metadata, payload) +} + +// NewLobbyInviteExpiredIntent builds the private-game owner intent published +// by Game Lobby when an invite expires. +func NewLobbyInviteExpiredIntent(metadata Metadata, ownerUserID string, payload LobbyInviteExpiredPayload) (Intent, error) { + return newIntent(NotificationTypeLobbyInviteExpired, ProducerGameLobby, AudienceKindUser, []string{ownerUserID}, metadata, payload) +} diff --git a/pkg/notificationintent/publisher.go b/pkg/notificationintent/publisher.go new file mode 100644 index 0000000..0c0924d --- /dev/null +++ b/pkg/notificationintent/publisher.go @@ -0,0 +1,73 @@ +package notificationintent + +import ( + "context" + "errors" + "fmt" + + "github.com/redis/go-redis/v9" +) + +// RedisClient stores the minimal Redis command surface required by Publisher. +type RedisClient interface { + // XAdd appends one entry to a Redis Stream. + XAdd(context.Context, *redis.XAddArgs) *redis.StringCmd +} + +// PublisherConfig stores the dependencies and stream name used by Publisher. +type PublisherConfig struct { + // Client appends normalized intents to Redis Streams. + Client RedisClient + + // Stream stores the Redis Stream name. When empty, DefaultIntentsStream is + // used. + Stream string +} + +// Publisher publishes normalized notification intents into the Notification +// Service ingress stream. +type Publisher struct { + client RedisClient + stream string +} + +// NewPublisher constructs a Publisher from cfg. +func NewPublisher(cfg PublisherConfig) (*Publisher, error) { + if cfg.Client == nil { + return nil, errors.New("new notification intent publisher: nil redis client") + } + if cfg.Stream == "" { + cfg.Stream = DefaultIntentsStream + } + + return &Publisher{ + client: cfg.Client, + stream: cfg.Stream, + }, nil +} + +// Publish validates intent and appends it with plain XADD. It does not trim +// the stream and does not perform hidden retries. +func (publisher *Publisher) Publish(ctx context.Context, intent Intent) (string, error) { + if ctx == nil { + return "", errors.New("publish notification intent: nil context") + } + if publisher == nil || publisher.client == nil { + return "", errors.New("publish notification intent: nil publisher") + } + + values, err := intent.Values() + if err != nil { + return "", fmt.Errorf("publish notification intent: %w", err) + } + + entryID, err := publisher.client.XAdd(ctx, &redis.XAddArgs{ + Stream: publisher.stream, + Values: values, + }).Result() + if err != nil { + return "", fmt.Errorf("publish notification intent: xadd: %w", err) + } + + return entryID, nil +} diff --git a/pkg/notificationintent/publisher_test.go b/pkg/notificationintent/publisher_test.go new file mode 100644 index 0000000..096d306 --- /dev/null +++ b/pkg/notificationintent/publisher_test.go @@ -0,0 +1,44 @@ +package notificationintent + +import ( + "context" + "testing" + + "github.com/alicebob/miniredis/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" +) + +func TestPublisherPublishAppendsIntentToDefaultStream(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + redisClient := redis.NewClient(&redis.Options{Addr: redisServer.Addr()}) + t.Cleanup(func() { + require.NoError(t, redisClient.Close()) + }) + + publisher, err := NewPublisher(PublisherConfig{Client: redisClient}) + require.NoError(t, err) + + intent, err := NewGameTurnReadyIntent(defaultMetadata(), []string{"user-1"}, GameTurnReadyPayload{ + GameID: "game-1", + GameName: "Nebula Clash", + TurnNumber: 54, + }) + require.NoError(t, err) + + entryID, err := publisher.Publish(context.Background(), intent) + require.NoError(t, err) + require.NotEmpty(t, entryID) + + messages, err := redisClient.XRange(context.Background(), DefaultIntentsStream, "-", "+").Result() + require.NoError(t, err) + require.Len(t, messages, 1) + require.Equal(t, entryID, messages[0].ID) + require.Equal(t, NotificationTypeGameTurnReady.String(), messages[0].Values[fieldNotificationType]) + require.Equal(t, ProducerGameMaster.String(), messages[0].Values[fieldProducer]) + require.Equal(t, AudienceKindUser.String(), messages[0].Values[fieldAudienceKind]) + require.Equal(t, `["user-1"]`, messages[0].Values[fieldRecipientUserIDs]) + require.Equal(t, `{"game_id":"game-1","game_name":"Nebula Clash","turn_number":54}`, messages[0].Values[fieldPayloadJSON]) +} diff --git a/pkg/schema/fbs/notification.fbs b/pkg/schema/fbs/notification.fbs new file mode 100644 index 0000000..8938065 --- /dev/null +++ b/pkg/schema/fbs/notification.fbs @@ -0,0 +1,38 @@ +// notification contains shared FlatBuffers payloads published by +// Notification Service toward the gateway client event stream. +namespace notification; + +table GameTurnReadyEvent { + game_id:string; + turn_number:int64; +} + +table GameFinishedEvent { + game_id:string; + final_turn_number:int64; +} + +table LobbyApplicationSubmittedEvent { + game_id:string; + applicant_user_id:string; +} + +table LobbyMembershipApprovedEvent { + game_id:string; +} + +table LobbyMembershipRejectedEvent { + game_id:string; +} + +table LobbyInviteCreatedEvent { + game_id:string; + inviter_user_id:string; +} + +table LobbyInviteRedeemedEvent { + game_id:string; + invitee_user_id:string; +} + +root_type GameTurnReadyEvent; diff --git a/pkg/schema/fbs/notification/GameFinishedEvent.go b/pkg/schema/fbs/notification/GameFinishedEvent.go new file mode 100644 index 0000000..7e53458 --- /dev/null +++ b/pkg/schema/fbs/notification/GameFinishedEvent.go @@ -0,0 +1,75 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type GameFinishedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsGameFinishedEvent(buf []byte, offset flatbuffers.UOffsetT) *GameFinishedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &GameFinishedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishGameFinishedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsGameFinishedEvent(buf []byte, offset flatbuffers.UOffsetT) *GameFinishedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &GameFinishedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedGameFinishedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *GameFinishedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *GameFinishedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *GameFinishedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func (rcv *GameFinishedEvent) FinalTurnNumber() int64 { + o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) + if o != 0 { + return rcv._tab.GetInt64(o + rcv._tab.Pos) + } + return 0 +} + +func (rcv *GameFinishedEvent) MutateFinalTurnNumber(n int64) bool { + return rcv._tab.MutateInt64Slot(6, n) +} + +func GameFinishedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(2) +} +func GameFinishedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func GameFinishedEventAddFinalTurnNumber(builder *flatbuffers.Builder, finalTurnNumber int64) { + builder.PrependInt64Slot(1, finalTurnNumber, 0) +} +func GameFinishedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/GameTurnReadyEvent.go b/pkg/schema/fbs/notification/GameTurnReadyEvent.go new file mode 100644 index 0000000..710167b --- /dev/null +++ b/pkg/schema/fbs/notification/GameTurnReadyEvent.go @@ -0,0 +1,75 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type GameTurnReadyEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsGameTurnReadyEvent(buf []byte, offset flatbuffers.UOffsetT) *GameTurnReadyEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &GameTurnReadyEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishGameTurnReadyEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsGameTurnReadyEvent(buf []byte, offset flatbuffers.UOffsetT) *GameTurnReadyEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &GameTurnReadyEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedGameTurnReadyEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *GameTurnReadyEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *GameTurnReadyEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *GameTurnReadyEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func (rcv *GameTurnReadyEvent) TurnNumber() int64 { + o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) + if o != 0 { + return rcv._tab.GetInt64(o + rcv._tab.Pos) + } + return 0 +} + +func (rcv *GameTurnReadyEvent) MutateTurnNumber(n int64) bool { + return rcv._tab.MutateInt64Slot(6, n) +} + +func GameTurnReadyEventStart(builder *flatbuffers.Builder) { + builder.StartObject(2) +} +func GameTurnReadyEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func GameTurnReadyEventAddTurnNumber(builder *flatbuffers.Builder, turnNumber int64) { + builder.PrependInt64Slot(1, turnNumber, 0) +} +func GameTurnReadyEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/LobbyApplicationSubmittedEvent.go b/pkg/schema/fbs/notification/LobbyApplicationSubmittedEvent.go new file mode 100644 index 0000000..2fee385 --- /dev/null +++ b/pkg/schema/fbs/notification/LobbyApplicationSubmittedEvent.go @@ -0,0 +1,71 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type LobbyApplicationSubmittedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsLobbyApplicationSubmittedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyApplicationSubmittedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &LobbyApplicationSubmittedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishLobbyApplicationSubmittedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsLobbyApplicationSubmittedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyApplicationSubmittedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &LobbyApplicationSubmittedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedLobbyApplicationSubmittedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *LobbyApplicationSubmittedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *LobbyApplicationSubmittedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *LobbyApplicationSubmittedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func (rcv *LobbyApplicationSubmittedEvent) ApplicantUserId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func LobbyApplicationSubmittedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(2) +} +func LobbyApplicationSubmittedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func LobbyApplicationSubmittedEventAddApplicantUserId(builder *flatbuffers.Builder, applicantUserId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(applicantUserId), 0) +} +func LobbyApplicationSubmittedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/LobbyInviteCreatedEvent.go b/pkg/schema/fbs/notification/LobbyInviteCreatedEvent.go new file mode 100644 index 0000000..f0bcca0 --- /dev/null +++ b/pkg/schema/fbs/notification/LobbyInviteCreatedEvent.go @@ -0,0 +1,71 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type LobbyInviteCreatedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsLobbyInviteCreatedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyInviteCreatedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &LobbyInviteCreatedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishLobbyInviteCreatedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsLobbyInviteCreatedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyInviteCreatedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &LobbyInviteCreatedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedLobbyInviteCreatedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *LobbyInviteCreatedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *LobbyInviteCreatedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *LobbyInviteCreatedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func (rcv *LobbyInviteCreatedEvent) InviterUserId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func LobbyInviteCreatedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(2) +} +func LobbyInviteCreatedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func LobbyInviteCreatedEventAddInviterUserId(builder *flatbuffers.Builder, inviterUserId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(inviterUserId), 0) +} +func LobbyInviteCreatedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/LobbyInviteRedeemedEvent.go b/pkg/schema/fbs/notification/LobbyInviteRedeemedEvent.go new file mode 100644 index 0000000..5abb91e --- /dev/null +++ b/pkg/schema/fbs/notification/LobbyInviteRedeemedEvent.go @@ -0,0 +1,71 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type LobbyInviteRedeemedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsLobbyInviteRedeemedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyInviteRedeemedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &LobbyInviteRedeemedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishLobbyInviteRedeemedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsLobbyInviteRedeemedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyInviteRedeemedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &LobbyInviteRedeemedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedLobbyInviteRedeemedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *LobbyInviteRedeemedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *LobbyInviteRedeemedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *LobbyInviteRedeemedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func (rcv *LobbyInviteRedeemedEvent) InviteeUserId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func LobbyInviteRedeemedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(2) +} +func LobbyInviteRedeemedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func LobbyInviteRedeemedEventAddInviteeUserId(builder *flatbuffers.Builder, inviteeUserId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(1, flatbuffers.UOffsetT(inviteeUserId), 0) +} +func LobbyInviteRedeemedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/LobbyMembershipApprovedEvent.go b/pkg/schema/fbs/notification/LobbyMembershipApprovedEvent.go new file mode 100644 index 0000000..fa19b25 --- /dev/null +++ b/pkg/schema/fbs/notification/LobbyMembershipApprovedEvent.go @@ -0,0 +1,60 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type LobbyMembershipApprovedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsLobbyMembershipApprovedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyMembershipApprovedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &LobbyMembershipApprovedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishLobbyMembershipApprovedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsLobbyMembershipApprovedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyMembershipApprovedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &LobbyMembershipApprovedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedLobbyMembershipApprovedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *LobbyMembershipApprovedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *LobbyMembershipApprovedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *LobbyMembershipApprovedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func LobbyMembershipApprovedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(1) +} +func LobbyMembershipApprovedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func LobbyMembershipApprovedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/schema/fbs/notification/LobbyMembershipRejectedEvent.go b/pkg/schema/fbs/notification/LobbyMembershipRejectedEvent.go new file mode 100644 index 0000000..a769c91 --- /dev/null +++ b/pkg/schema/fbs/notification/LobbyMembershipRejectedEvent.go @@ -0,0 +1,60 @@ +// Code generated by the FlatBuffers compiler. DO NOT EDIT. + +package notification + +import ( + flatbuffers "github.com/google/flatbuffers/go" +) + +type LobbyMembershipRejectedEvent struct { + _tab flatbuffers.Table +} + +func GetRootAsLobbyMembershipRejectedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyMembershipRejectedEvent { + n := flatbuffers.GetUOffsetT(buf[offset:]) + x := &LobbyMembershipRejectedEvent{} + x.Init(buf, n+offset) + return x +} + +func FinishLobbyMembershipRejectedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.Finish(offset) +} + +func GetSizePrefixedRootAsLobbyMembershipRejectedEvent(buf []byte, offset flatbuffers.UOffsetT) *LobbyMembershipRejectedEvent { + n := flatbuffers.GetUOffsetT(buf[offset+flatbuffers.SizeUint32:]) + x := &LobbyMembershipRejectedEvent{} + x.Init(buf, n+offset+flatbuffers.SizeUint32) + return x +} + +func FinishSizePrefixedLobbyMembershipRejectedEventBuffer(builder *flatbuffers.Builder, offset flatbuffers.UOffsetT) { + builder.FinishSizePrefixed(offset) +} + +func (rcv *LobbyMembershipRejectedEvent) Init(buf []byte, i flatbuffers.UOffsetT) { + rcv._tab.Bytes = buf + rcv._tab.Pos = i +} + +func (rcv *LobbyMembershipRejectedEvent) Table() flatbuffers.Table { + return rcv._tab +} + +func (rcv *LobbyMembershipRejectedEvent) GameId() []byte { + o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) + if o != 0 { + return rcv._tab.ByteVector(o + rcv._tab.Pos) + } + return nil +} + +func LobbyMembershipRejectedEventStart(builder *flatbuffers.Builder) { + builder.StartObject(1) +} +func LobbyMembershipRejectedEventAddGameId(builder *flatbuffers.Builder, gameId flatbuffers.UOffsetT) { + builder.PrependUOffsetTSlot(0, flatbuffers.UOffsetT(gameId), 0) +} +func LobbyMembershipRejectedEventEnd(builder *flatbuffers.Builder) flatbuffers.UOffsetT { + return builder.EndObject() +} diff --git a/pkg/transcoder/notification.go b/pkg/transcoder/notification.go new file mode 100644 index 0000000..43ab5b6 --- /dev/null +++ b/pkg/transcoder/notification.go @@ -0,0 +1,390 @@ +package transcoder + +import ( + "errors" + "fmt" + + notificationfbs "galaxy/schema/fbs/notification" + + flatbuffers "github.com/google/flatbuffers/go" +) + +// GameTurnReadyEvent is the independent Go representation of +// `notification.GameTurnReadyEvent`. +type GameTurnReadyEvent struct { + GameID string + TurnNumber int64 +} + +// GameFinishedEvent is the independent Go representation of +// `notification.GameFinishedEvent`. +type GameFinishedEvent struct { + GameID string + FinalTurnNumber int64 +} + +// LobbyApplicationSubmittedEvent is the independent Go representation of +// `notification.LobbyApplicationSubmittedEvent`. +type LobbyApplicationSubmittedEvent struct { + GameID string + ApplicantUserID string +} + +// LobbyMembershipApprovedEvent is the independent Go representation of +// `notification.LobbyMembershipApprovedEvent`. +type LobbyMembershipApprovedEvent struct { + GameID string +} + +// LobbyMembershipRejectedEvent is the independent Go representation of +// `notification.LobbyMembershipRejectedEvent`. +type LobbyMembershipRejectedEvent struct { + GameID string +} + +// LobbyInviteCreatedEvent is the independent Go representation of +// `notification.LobbyInviteCreatedEvent`. +type LobbyInviteCreatedEvent struct { + GameID string + InviterUserID string +} + +// LobbyInviteRedeemedEvent is the independent Go representation of +// `notification.LobbyInviteRedeemedEvent`. +type LobbyInviteRedeemedEvent struct { + GameID string + InviteeUserID string +} + +// GameTurnReadyEventToPayload converts GameTurnReadyEvent to FlatBuffers bytes +// suitable for the authenticated gateway push transport. +func GameTurnReadyEventToPayload(event *GameTurnReadyEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode game turn ready payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode game turn ready payload: game_id is empty") + } + + builder := flatbuffers.NewBuilder(64) + gameID := builder.CreateString(event.GameID) + + notificationfbs.GameTurnReadyEventStart(builder) + notificationfbs.GameTurnReadyEventAddGameId(builder, gameID) + notificationfbs.GameTurnReadyEventAddTurnNumber(builder, event.TurnNumber) + offset := notificationfbs.GameTurnReadyEventEnd(builder) + notificationfbs.FinishGameTurnReadyEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToGameTurnReadyEvent converts FlatBuffers payload bytes into +// GameTurnReadyEvent. +func PayloadToGameTurnReadyEvent(data []byte) (result *GameTurnReadyEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode game turn ready payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode game turn ready payload", &result, &err) + + event := notificationfbs.GetRootAsGameTurnReadyEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode game turn ready payload: %w", err) + } + + return &GameTurnReadyEvent{ + GameID: gameID, + TurnNumber: event.TurnNumber(), + }, nil +} + +// GameFinishedEventToPayload converts GameFinishedEvent to FlatBuffers bytes +// suitable for the authenticated gateway push transport. +func GameFinishedEventToPayload(event *GameFinishedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode game finished payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode game finished payload: game_id is empty") + } + + builder := flatbuffers.NewBuilder(64) + gameID := builder.CreateString(event.GameID) + + notificationfbs.GameFinishedEventStart(builder) + notificationfbs.GameFinishedEventAddGameId(builder, gameID) + notificationfbs.GameFinishedEventAddFinalTurnNumber(builder, event.FinalTurnNumber) + offset := notificationfbs.GameFinishedEventEnd(builder) + notificationfbs.FinishGameFinishedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToGameFinishedEvent converts FlatBuffers payload bytes into +// GameFinishedEvent. +func PayloadToGameFinishedEvent(data []byte) (result *GameFinishedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode game finished payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode game finished payload", &result, &err) + + event := notificationfbs.GetRootAsGameFinishedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode game finished payload: %w", err) + } + + return &GameFinishedEvent{ + GameID: gameID, + FinalTurnNumber: event.FinalTurnNumber(), + }, nil +} + +// LobbyApplicationSubmittedEventToPayload converts +// LobbyApplicationSubmittedEvent to FlatBuffers bytes suitable for the +// authenticated gateway push transport. +func LobbyApplicationSubmittedEventToPayload(event *LobbyApplicationSubmittedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode lobby application submitted payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode lobby application submitted payload: game_id is empty") + } + if event.ApplicantUserID == "" { + return nil, errors.New("encode lobby application submitted payload: applicant_user_id is empty") + } + + builder := flatbuffers.NewBuilder(96) + gameID := builder.CreateString(event.GameID) + applicantUserID := builder.CreateString(event.ApplicantUserID) + + notificationfbs.LobbyApplicationSubmittedEventStart(builder) + notificationfbs.LobbyApplicationSubmittedEventAddGameId(builder, gameID) + notificationfbs.LobbyApplicationSubmittedEventAddApplicantUserId(builder, applicantUserID) + offset := notificationfbs.LobbyApplicationSubmittedEventEnd(builder) + notificationfbs.FinishLobbyApplicationSubmittedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToLobbyApplicationSubmittedEvent converts FlatBuffers payload bytes +// into LobbyApplicationSubmittedEvent. +func PayloadToLobbyApplicationSubmittedEvent(data []byte) (result *LobbyApplicationSubmittedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode lobby application submitted payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode lobby application submitted payload", &result, &err) + + event := notificationfbs.GetRootAsLobbyApplicationSubmittedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode lobby application submitted payload: %w", err) + } + applicantUserID, err := requiredNotificationString(event.ApplicantUserId(), "applicant_user_id") + if err != nil { + return nil, fmt.Errorf("decode lobby application submitted payload: %w", err) + } + + return &LobbyApplicationSubmittedEvent{ + GameID: gameID, + ApplicantUserID: applicantUserID, + }, nil +} + +// LobbyMembershipApprovedEventToPayload converts LobbyMembershipApprovedEvent +// to FlatBuffers bytes suitable for the authenticated gateway push transport. +func LobbyMembershipApprovedEventToPayload(event *LobbyMembershipApprovedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode lobby membership approved payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode lobby membership approved payload: game_id is empty") + } + + builder := flatbuffers.NewBuilder(48) + gameID := builder.CreateString(event.GameID) + + notificationfbs.LobbyMembershipApprovedEventStart(builder) + notificationfbs.LobbyMembershipApprovedEventAddGameId(builder, gameID) + offset := notificationfbs.LobbyMembershipApprovedEventEnd(builder) + notificationfbs.FinishLobbyMembershipApprovedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToLobbyMembershipApprovedEvent converts FlatBuffers payload bytes +// into LobbyMembershipApprovedEvent. +func PayloadToLobbyMembershipApprovedEvent(data []byte) (result *LobbyMembershipApprovedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode lobby membership approved payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode lobby membership approved payload", &result, &err) + + event := notificationfbs.GetRootAsLobbyMembershipApprovedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode lobby membership approved payload: %w", err) + } + + return &LobbyMembershipApprovedEvent{GameID: gameID}, nil +} + +// LobbyMembershipRejectedEventToPayload converts LobbyMembershipRejectedEvent +// to FlatBuffers bytes suitable for the authenticated gateway push transport. +func LobbyMembershipRejectedEventToPayload(event *LobbyMembershipRejectedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode lobby membership rejected payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode lobby membership rejected payload: game_id is empty") + } + + builder := flatbuffers.NewBuilder(48) + gameID := builder.CreateString(event.GameID) + + notificationfbs.LobbyMembershipRejectedEventStart(builder) + notificationfbs.LobbyMembershipRejectedEventAddGameId(builder, gameID) + offset := notificationfbs.LobbyMembershipRejectedEventEnd(builder) + notificationfbs.FinishLobbyMembershipRejectedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToLobbyMembershipRejectedEvent converts FlatBuffers payload bytes +// into LobbyMembershipRejectedEvent. +func PayloadToLobbyMembershipRejectedEvent(data []byte) (result *LobbyMembershipRejectedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode lobby membership rejected payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode lobby membership rejected payload", &result, &err) + + event := notificationfbs.GetRootAsLobbyMembershipRejectedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode lobby membership rejected payload: %w", err) + } + + return &LobbyMembershipRejectedEvent{GameID: gameID}, nil +} + +// LobbyInviteCreatedEventToPayload converts LobbyInviteCreatedEvent to +// FlatBuffers bytes suitable for the authenticated gateway push transport. +func LobbyInviteCreatedEventToPayload(event *LobbyInviteCreatedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode lobby invite created payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode lobby invite created payload: game_id is empty") + } + if event.InviterUserID == "" { + return nil, errors.New("encode lobby invite created payload: inviter_user_id is empty") + } + + builder := flatbuffers.NewBuilder(96) + gameID := builder.CreateString(event.GameID) + inviterUserID := builder.CreateString(event.InviterUserID) + + notificationfbs.LobbyInviteCreatedEventStart(builder) + notificationfbs.LobbyInviteCreatedEventAddGameId(builder, gameID) + notificationfbs.LobbyInviteCreatedEventAddInviterUserId(builder, inviterUserID) + offset := notificationfbs.LobbyInviteCreatedEventEnd(builder) + notificationfbs.FinishLobbyInviteCreatedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToLobbyInviteCreatedEvent converts FlatBuffers payload bytes into +// LobbyInviteCreatedEvent. +func PayloadToLobbyInviteCreatedEvent(data []byte) (result *LobbyInviteCreatedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode lobby invite created payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode lobby invite created payload", &result, &err) + + event := notificationfbs.GetRootAsLobbyInviteCreatedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode lobby invite created payload: %w", err) + } + inviterUserID, err := requiredNotificationString(event.InviterUserId(), "inviter_user_id") + if err != nil { + return nil, fmt.Errorf("decode lobby invite created payload: %w", err) + } + + return &LobbyInviteCreatedEvent{ + GameID: gameID, + InviterUserID: inviterUserID, + }, nil +} + +// LobbyInviteRedeemedEventToPayload converts LobbyInviteRedeemedEvent to +// FlatBuffers bytes suitable for the authenticated gateway push transport. +func LobbyInviteRedeemedEventToPayload(event *LobbyInviteRedeemedEvent) ([]byte, error) { + if event == nil { + return nil, errors.New("encode lobby invite redeemed payload: event is nil") + } + if event.GameID == "" { + return nil, errors.New("encode lobby invite redeemed payload: game_id is empty") + } + if event.InviteeUserID == "" { + return nil, errors.New("encode lobby invite redeemed payload: invitee_user_id is empty") + } + + builder := flatbuffers.NewBuilder(96) + gameID := builder.CreateString(event.GameID) + inviteeUserID := builder.CreateString(event.InviteeUserID) + + notificationfbs.LobbyInviteRedeemedEventStart(builder) + notificationfbs.LobbyInviteRedeemedEventAddGameId(builder, gameID) + notificationfbs.LobbyInviteRedeemedEventAddInviteeUserId(builder, inviteeUserID) + offset := notificationfbs.LobbyInviteRedeemedEventEnd(builder) + notificationfbs.FinishLobbyInviteRedeemedEventBuffer(builder, offset) + + return builder.FinishedBytes(), nil +} + +// PayloadToLobbyInviteRedeemedEvent converts FlatBuffers payload bytes into +// LobbyInviteRedeemedEvent. +func PayloadToLobbyInviteRedeemedEvent(data []byte) (result *LobbyInviteRedeemedEvent, err error) { + if len(data) == 0 { + return nil, errors.New("decode lobby invite redeemed payload: data is empty") + } + + defer recoverNotificationDecodePanic("decode lobby invite redeemed payload", &result, &err) + + event := notificationfbs.GetRootAsLobbyInviteRedeemedEvent(data, 0) + gameID, err := requiredNotificationString(event.GameId(), "game_id") + if err != nil { + return nil, fmt.Errorf("decode lobby invite redeemed payload: %w", err) + } + inviteeUserID, err := requiredNotificationString(event.InviteeUserId(), "invitee_user_id") + if err != nil { + return nil, fmt.Errorf("decode lobby invite redeemed payload: %w", err) + } + + return &LobbyInviteRedeemedEvent{ + GameID: gameID, + InviteeUserID: inviteeUserID, + }, nil +} + +func requiredNotificationString(value []byte, field string) (string, error) { + if len(value) == 0 { + return "", fmt.Errorf("%s is missing", field) + } + + return string(value), nil +} + +func recoverNotificationDecodePanic[T any](message string, result **T, err *error) { + if recovered := recover(); recovered != nil { + *result = nil + *err = fmt.Errorf("%s: panic recovered: %v", message, recovered) + } +} diff --git a/pkg/transcoder/notification_test.go b/pkg/transcoder/notification_test.go new file mode 100644 index 0000000..396243c --- /dev/null +++ b/pkg/transcoder/notification_test.go @@ -0,0 +1,387 @@ +package transcoder + +import ( + "reflect" + "strings" + "testing" + + notificationfbs "galaxy/schema/fbs/notification" + + flatbuffers "github.com/google/flatbuffers/go" +) + +func TestNotificationPayloadRoundTrips(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + source any + encode func(any) ([]byte, error) + decode func([]byte) (any, error) + }{ + { + name: "game turn ready", + source: &GameTurnReadyEvent{GameID: "game-1", TurnNumber: 54}, + encode: func(value any) ([]byte, error) { return GameTurnReadyEventToPayload(value.(*GameTurnReadyEvent)) }, + decode: func(data []byte) (any, error) { return PayloadToGameTurnReadyEvent(data) }, + }, + { + name: "game finished", + source: &GameFinishedEvent{GameID: "game-2", FinalTurnNumber: 99}, + encode: func(value any) ([]byte, error) { return GameFinishedEventToPayload(value.(*GameFinishedEvent)) }, + decode: func(data []byte) (any, error) { return PayloadToGameFinishedEvent(data) }, + }, + { + name: "lobby application submitted", + source: &LobbyApplicationSubmittedEvent{GameID: "game-3", ApplicantUserID: "user-7"}, + encode: func(value any) ([]byte, error) { + return LobbyApplicationSubmittedEventToPayload(value.(*LobbyApplicationSubmittedEvent)) + }, + decode: func(data []byte) (any, error) { return PayloadToLobbyApplicationSubmittedEvent(data) }, + }, + { + name: "lobby membership approved", + source: &LobbyMembershipApprovedEvent{GameID: "game-4"}, + encode: func(value any) ([]byte, error) { + return LobbyMembershipApprovedEventToPayload(value.(*LobbyMembershipApprovedEvent)) + }, + decode: func(data []byte) (any, error) { return PayloadToLobbyMembershipApprovedEvent(data) }, + }, + { + name: "lobby membership rejected", + source: &LobbyMembershipRejectedEvent{GameID: "game-5"}, + encode: func(value any) ([]byte, error) { + return LobbyMembershipRejectedEventToPayload(value.(*LobbyMembershipRejectedEvent)) + }, + decode: func(data []byte) (any, error) { return PayloadToLobbyMembershipRejectedEvent(data) }, + }, + { + name: "lobby invite created", + source: &LobbyInviteCreatedEvent{GameID: "game-6", InviterUserID: "user-8"}, + encode: func(value any) ([]byte, error) { + return LobbyInviteCreatedEventToPayload(value.(*LobbyInviteCreatedEvent)) + }, + decode: func(data []byte) (any, error) { return PayloadToLobbyInviteCreatedEvent(data) }, + }, + { + name: "lobby invite redeemed", + source: &LobbyInviteRedeemedEvent{GameID: "game-7", InviteeUserID: "user-9"}, + encode: func(value any) ([]byte, error) { + return LobbyInviteRedeemedEventToPayload(value.(*LobbyInviteRedeemedEvent)) + }, + decode: func(data []byte) (any, error) { return PayloadToLobbyInviteRedeemedEvent(data) }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + payload, err := tt.encode(tt.source) + if err != nil { + t.Fatalf("encode payload: %v", err) + } + + decoded, err := tt.decode(payload) + if err != nil { + t.Fatalf("decode payload: %v", err) + } + + if !reflect.DeepEqual(tt.source, decoded) { + t.Fatalf("round-trip mismatch\nsource: %#v\ndecoded:%#v", tt.source, decoded) + } + }) + } +} + +func TestNotificationPayloadEncodersRejectNilInputs(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + call func() error + }{ + { + name: "game turn ready", + call: func() error { + _, err := GameTurnReadyEventToPayload(nil) + return err + }, + }, + { + name: "game finished", + call: func() error { + _, err := GameFinishedEventToPayload(nil) + return err + }, + }, + { + name: "lobby application submitted", + call: func() error { + _, err := LobbyApplicationSubmittedEventToPayload(nil) + return err + }, + }, + { + name: "lobby membership approved", + call: func() error { + _, err := LobbyMembershipApprovedEventToPayload(nil) + return err + }, + }, + { + name: "lobby membership rejected", + call: func() error { + _, err := LobbyMembershipRejectedEventToPayload(nil) + return err + }, + }, + { + name: "lobby invite created", + call: func() error { + _, err := LobbyInviteCreatedEventToPayload(nil) + return err + }, + }, + { + name: "lobby invite redeemed", + call: func() error { + _, err := LobbyInviteRedeemedEventToPayload(nil) + return err + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if err := tt.call(); err == nil { + t.Fatal("expected error") + } + }) + } +} + +func TestNotificationPayloadDecodersRejectEmptyPayloads(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + call func() error + }{ + { + name: "game turn ready", + call: func() error { + _, err := PayloadToGameTurnReadyEvent(nil) + return err + }, + }, + { + name: "game finished", + call: func() error { + _, err := PayloadToGameFinishedEvent(nil) + return err + }, + }, + { + name: "lobby application submitted", + call: func() error { + _, err := PayloadToLobbyApplicationSubmittedEvent(nil) + return err + }, + }, + { + name: "lobby membership approved", + call: func() error { + _, err := PayloadToLobbyMembershipApprovedEvent(nil) + return err + }, + }, + { + name: "lobby membership rejected", + call: func() error { + _, err := PayloadToLobbyMembershipRejectedEvent(nil) + return err + }, + }, + { + name: "lobby invite created", + call: func() error { + _, err := PayloadToLobbyInviteCreatedEvent(nil) + return err + }, + }, + { + name: "lobby invite redeemed", + call: func() error { + _, err := PayloadToLobbyInviteRedeemedEvent(nil) + return err + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if err := tt.call(); err == nil { + t.Fatal("expected error") + } + }) + } +} + +func TestNotificationPayloadEncodersRejectMissingRequiredStrings(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + call func() error + want string + }{ + { + name: "game turn ready", + call: func() error { + _, err := GameTurnReadyEventToPayload(&GameTurnReadyEvent{}) + return err + }, + want: "game_id is empty", + }, + { + name: "lobby application submitted", + call: func() error { + _, err := LobbyApplicationSubmittedEventToPayload(&LobbyApplicationSubmittedEvent{GameID: "game-1"}) + return err + }, + want: "applicant_user_id is empty", + }, + { + name: "lobby invite created", + call: func() error { + _, err := LobbyInviteCreatedEventToPayload(&LobbyInviteCreatedEvent{GameID: "game-1"}) + return err + }, + want: "inviter_user_id is empty", + }, + { + name: "lobby invite redeemed", + call: func() error { + _, err := LobbyInviteRedeemedEventToPayload(&LobbyInviteRedeemedEvent{GameID: "game-1"}) + return err + }, + want: "invitee_user_id is empty", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := tt.call() + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), tt.want) { + t.Fatalf("unexpected error: %v", err) + } + }) + } +} + +func TestNotificationPayloadDecodersRejectMissingRequiredStrings(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + payload func() []byte + decode func([]byte) error + want string + }{ + { + name: "game turn ready", + payload: func() []byte { + builder := flatbuffers.NewBuilder(32) + notificationfbs.GameTurnReadyEventStart(builder) + offset := notificationfbs.GameTurnReadyEventEnd(builder) + notificationfbs.FinishGameTurnReadyEventBuffer(builder, offset) + return builder.FinishedBytes() + }, + decode: func(data []byte) error { + _, err := PayloadToGameTurnReadyEvent(data) + return err + }, + want: "game_id is missing", + }, + { + name: "lobby application submitted", + payload: func() []byte { + builder := flatbuffers.NewBuilder(32) + gameID := builder.CreateString("game-1") + notificationfbs.LobbyApplicationSubmittedEventStart(builder) + notificationfbs.LobbyApplicationSubmittedEventAddGameId(builder, gameID) + offset := notificationfbs.LobbyApplicationSubmittedEventEnd(builder) + notificationfbs.FinishLobbyApplicationSubmittedEventBuffer(builder, offset) + return builder.FinishedBytes() + }, + decode: func(data []byte) error { + _, err := PayloadToLobbyApplicationSubmittedEvent(data) + return err + }, + want: "applicant_user_id is missing", + }, + { + name: "lobby invite created", + payload: func() []byte { + builder := flatbuffers.NewBuilder(32) + gameID := builder.CreateString("game-1") + notificationfbs.LobbyInviteCreatedEventStart(builder) + notificationfbs.LobbyInviteCreatedEventAddGameId(builder, gameID) + offset := notificationfbs.LobbyInviteCreatedEventEnd(builder) + notificationfbs.FinishLobbyInviteCreatedEventBuffer(builder, offset) + return builder.FinishedBytes() + }, + decode: func(data []byte) error { + _, err := PayloadToLobbyInviteCreatedEvent(data) + return err + }, + want: "inviter_user_id is missing", + }, + { + name: "lobby invite redeemed", + payload: func() []byte { + builder := flatbuffers.NewBuilder(32) + gameID := builder.CreateString("game-1") + notificationfbs.LobbyInviteRedeemedEventStart(builder) + notificationfbs.LobbyInviteRedeemedEventAddGameId(builder, gameID) + offset := notificationfbs.LobbyInviteRedeemedEventEnd(builder) + notificationfbs.FinishLobbyInviteRedeemedEventBuffer(builder, offset) + return builder.FinishedBytes() + }, + decode: func(data []byte) error { + _, err := PayloadToLobbyInviteRedeemedEvent(data) + return err + }, + want: "invitee_user_id is missing", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := tt.decode(tt.payload()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), tt.want) { + t.Fatalf("unexpected error: %v", err) + } + }) + } +}