outbox for gateways

This commit is contained in:
Stephan D
2026-02-18 01:35:28 +01:00
parent 974caf286c
commit 69531cee73
221 changed files with 12172 additions and 782 deletions

View File

@@ -0,0 +1,33 @@
package outbox
import (
"time"
"github.com/tech/sendico/pkg/db/storable"
)
const Collection = "outbox"
type Status string
const (
StatusPending Status = "pending"
StatusSent Status = "sent"
StatusFailed Status = "failed"
)
// Event represents an outbox message pending dispatch to the broker.
type Event struct {
storable.Base `bson:",inline" json:",inline"`
EventID string `bson:"eventId" json:"eventId"`
Subject string `bson:"subject" json:"subject"`
Payload []byte `bson:"payload" json:"payload"`
Status Status `bson:"status" json:"status"`
Attempts int `bson:"attempts" json:"attempts"`
SentAt *time.Time `bson:"sentAt,omitempty" json:"sentAt,omitempty"`
}
func (*Event) Collection() string {
return Collection
}

View File

@@ -0,0 +1,123 @@
package outbox
import (
"context"
"time"
"github.com/tech/sendico/pkg/db/repository"
ri "github.com/tech/sendico/pkg/db/repository/index"
"github.com/tech/sendico/pkg/merrors"
"github.com/tech/sendico/pkg/mlogger"
"go.mongodb.org/mongo-driver/v2/bson"
"go.mongodb.org/mongo-driver/v2/mongo"
"go.uber.org/zap"
)
type mongoStore struct {
logger mlogger.Logger
repo repository.Repository
}
func NewMongoStore(logger mlogger.Logger, db *mongo.Database) (Store, error) {
if db == nil {
return nil, merrors.InvalidArgument("mongo database is nil")
}
if logger == nil {
logger = zap.NewNop()
}
repo := repository.CreateMongoRepository(db, Collection)
statusIndex := &ri.Definition{
Keys: []ri.Key{{Field: "status", Sort: ri.Asc}, {Field: "createdAt", Sort: ri.Asc}},
}
if err := repo.CreateIndex(statusIndex); err != nil {
logger.Error("Failed to ensure outbox status index", zap.Error(err))
return nil, err
}
eventIDIndex := &ri.Definition{
Keys: []ri.Key{{Field: "eventId", Sort: ri.Asc}},
Unique: true,
}
if err := repo.CreateIndex(eventIDIndex); err != nil {
logger.Error("Failed to ensure outbox eventId index", zap.Error(err))
return nil, err
}
childLogger := logger.Named(Collection)
childLogger.Debug("Outbox store initialised", zap.String("collection", Collection))
return &mongoStore{logger: childLogger, repo: repo}, nil
}
func (o *mongoStore) Create(ctx context.Context, event *Event) error {
if event == nil {
o.logger.Warn("Attempt to create nil outbox event")
return merrors.InvalidArgument("outbox: nil event")
}
if err := o.repo.Insert(ctx, event, nil); err != nil {
if mongo.IsDuplicateKeyError(err) {
o.logger.Warn("Duplicate outbox event id", zap.String("event_id", event.EventID))
return merrors.DataConflict("outbox event with this id already exists")
}
o.logger.Warn("Failed to create outbox event", zap.Error(err))
return err
}
o.logger.Debug("Outbox event created", zap.String("event_id", event.EventID), zap.String("subject", event.Subject))
return nil
}
func (o *mongoStore) ListPending(ctx context.Context, limit int) ([]*Event, error) {
limit64 := int64(limit)
query := repository.Query().
Filter(repository.Field("status"), StatusPending).
Limit(&limit64).
Sort(repository.Field("createdAt"), true)
events := make([]*Event, 0)
err := o.repo.FindManyByFilter(ctx, query, func(cur *mongo.Cursor) error {
doc := &Event{}
if err := cur.Decode(doc); err != nil {
return err
}
events = append(events, doc)
return nil
})
if err != nil {
o.logger.Warn("Failed to list pending outbox events", zap.Error(err))
return nil, err
}
return events, nil
}
func (o *mongoStore) MarkSent(ctx context.Context, eventRef bson.ObjectID, sentAt time.Time) error {
if eventRef.IsZero() {
return merrors.InvalidArgument("outbox: zero event id")
}
patch := repository.Patch().
Set(repository.Field("status"), StatusSent).
Set(repository.Field("sentAt"), sentAt)
return o.repo.Patch(ctx, eventRef, patch)
}
func (o *mongoStore) MarkFailed(ctx context.Context, eventRef bson.ObjectID) error {
if eventRef.IsZero() {
return merrors.InvalidArgument("outbox: zero event id")
}
patch := repository.Patch().Set(repository.Field("status"), StatusFailed)
return o.repo.Patch(ctx, eventRef, patch)
}
func (o *mongoStore) IncrementAttempts(ctx context.Context, eventRef bson.ObjectID) error {
if eventRef.IsZero() {
return merrors.InvalidArgument("outbox: zero event id")
}
patch := repository.Patch().Inc(repository.Field("attempts"), 1)
return o.repo.Patch(ctx, eventRef, patch)
}

View File

@@ -0,0 +1,108 @@
package outbox
import (
"context"
"strings"
"time"
pmessaging "github.com/tech/sendico/pkg/messaging"
pmessagingreliable "github.com/tech/sendico/pkg/messaging/reliable"
"github.com/tech/sendico/pkg/mlogger"
cfgmodel "github.com/tech/sendico/pkg/model"
"go.mongodb.org/mongo-driver/v2/bson"
)
type reliableStoreAdapter struct {
store Store
}
func NewReliableProducer(logger mlogger.Logger, direct pmessaging.Producer, store Store, messagingSettings cfgmodel.SettingsT, opts ...pmessagingreliable.Option) (*pmessagingreliable.ReliableProducer, pmessagingreliable.Settings, error) {
if store == nil {
return nil, pmessagingreliable.DefaultSettings(), nil
}
producer, settings, err := pmessagingreliable.NewReliableProducerFromConfig(logger, direct, &reliableStoreAdapter{store: store}, messagingSettings, opts...)
if err != nil {
return nil, pmessagingreliable.Settings{}, err
}
return producer, settings, nil
}
func (a *reliableStoreAdapter) Enqueue(ctx context.Context, msg pmessagingreliable.OutboxMessage) error {
if a == nil || a.store == nil {
return nil
}
return a.store.Create(ctx, &Event{
EventID: strings.TrimSpace(msg.EventID),
Subject: strings.TrimSpace(msg.Subject),
Payload: append([]byte(nil), msg.Payload...),
Status: StatusPending,
Attempts: msg.Attempts,
})
}
func (a *reliableStoreAdapter) ListPending(ctx context.Context, limit int) ([]pmessagingreliable.OutboxMessage, error) {
if a == nil || a.store == nil {
return nil, nil
}
events, err := a.store.ListPending(ctx, limit)
if err != nil {
return nil, err
}
result := make([]pmessagingreliable.OutboxMessage, 0, len(events))
for _, event := range events {
if event == nil {
continue
}
reference := ""
if eventRef := event.GetID(); eventRef != nil && !eventRef.IsZero() {
reference = eventRef.Hex()
}
result = append(result, pmessagingreliable.OutboxMessage{
Reference: reference,
EventID: strings.TrimSpace(event.EventID),
Subject: strings.TrimSpace(event.Subject),
Payload: append([]byte(nil), event.Payload...),
Attempts: event.Attempts,
CreatedAt: event.CreatedAt,
})
}
return result, nil
}
func (a *reliableStoreAdapter) MarkSent(ctx context.Context, reference string, sentAt time.Time) error {
if a == nil || a.store == nil {
return nil
}
eventRef, err := parseObjectID(strings.TrimSpace(reference))
if err != nil {
return err
}
return a.store.MarkSent(ctx, eventRef, sentAt)
}
func (a *reliableStoreAdapter) MarkFailed(ctx context.Context, reference string) error {
if a == nil || a.store == nil {
return nil
}
eventRef, err := parseObjectID(strings.TrimSpace(reference))
if err != nil {
return err
}
return a.store.MarkFailed(ctx, eventRef)
}
func (a *reliableStoreAdapter) IncrementAttempts(ctx context.Context, reference string) error {
if a == nil || a.store == nil {
return nil
}
eventRef, err := parseObjectID(strings.TrimSpace(reference))
if err != nil {
return err
}
return a.store.IncrementAttempts(ctx, eventRef)
}
func parseObjectID(raw string) (bson.ObjectID, error) {
return bson.ObjectIDFromHex(raw)
}

View File

@@ -0,0 +1,330 @@
package outbox
import (
"context"
"errors"
"sort"
"strings"
"sync"
"testing"
"time"
me "github.com/tech/sendico/pkg/messaging/envelope"
pmessagingreliable "github.com/tech/sendico/pkg/messaging/reliable"
domainmodel "github.com/tech/sendico/pkg/model"
notification "github.com/tech/sendico/pkg/model/notification"
"github.com/tech/sendico/pkg/mservice"
"go.mongodb.org/mongo-driver/v2/bson"
"go.uber.org/zap"
)
func TestGatewayReliableProducerPersistsAndRetriesOnBrokerFailure(t *testing.T) {
store := newMemoryOutboxStore()
broker := &flakyDirectProducer{failuresRemaining: 1}
producer, _, err := NewReliableProducer(
zap.NewNop(),
broker,
store,
nil,
pmessagingreliable.WithBatchSize(1),
pmessagingreliable.WithMaxAttempts(3),
)
if err != nil {
t.Fatalf("failed to create reliable producer: %v", err)
}
env := newTestEnvelope(t, []byte(`{"transferRef":"tx-1","status":"pending"}`))
if err := producer.SendWithOutbox(context.Background(), env); err != nil {
t.Fatalf("failed to enqueue envelope into outbox: %v", err)
}
eventID := env.GetMessageId().String()
persisted := store.EventByID(eventID)
if persisted == nil {
t.Fatalf("expected outbox event %s to be persisted", eventID)
}
if persisted.Status != StatusPending {
t.Fatalf("expected pending status after enqueue, got %q", persisted.Status)
}
if persisted.Attempts != 0 {
t.Fatalf("expected zero attempts after enqueue, got %d", persisted.Attempts)
}
processed, err := producer.DispatchPending(context.Background())
if err != nil {
t.Fatalf("first dispatch failed: %v", err)
}
if processed != 1 {
t.Fatalf("expected first dispatch to process 1 event, got %d", processed)
}
afterFailure := store.EventByID(eventID)
if afterFailure == nil {
t.Fatalf("expected outbox event %s to exist after broker failure", eventID)
}
if afterFailure.Status != StatusPending {
t.Fatalf("expected event to stay pending after transient broker error, got %q", afterFailure.Status)
}
if afterFailure.Attempts != 1 {
t.Fatalf("expected attempts to increment to 1 after failure, got %d", afterFailure.Attempts)
}
if afterFailure.SentAt != nil {
t.Fatalf("expected sentAt to be empty after failed publish")
}
processed, err = producer.DispatchPending(context.Background())
if err != nil {
t.Fatalf("second dispatch failed: %v", err)
}
if processed != 1 {
t.Fatalf("expected second dispatch to process 1 event, got %d", processed)
}
afterRetry := store.EventByID(eventID)
if afterRetry == nil {
t.Fatalf("expected outbox event %s to exist after retry", eventID)
}
if afterRetry.Status != StatusSent {
t.Fatalf("expected event to be sent after retry, got %q", afterRetry.Status)
}
if afterRetry.Attempts != 1 {
t.Fatalf("expected attempts to remain 1 after successful retry, got %d", afterRetry.Attempts)
}
if afterRetry.SentAt == nil {
t.Fatalf("expected sentAt to be set after successful publish")
}
if attempts := broker.Attempts(); attempts != 2 {
t.Fatalf("expected two broker attempts (fail then success), got %d", attempts)
}
}
func TestGatewayReliableProducerMarksFailedAfterMaxAttempts(t *testing.T) {
store := newMemoryOutboxStore()
broker := &flakyDirectProducer{failuresRemaining: 10}
producer, _, err := NewReliableProducer(
zap.NewNop(),
broker,
store,
nil,
pmessagingreliable.WithBatchSize(1),
pmessagingreliable.WithMaxAttempts(2),
)
if err != nil {
t.Fatalf("failed to create reliable producer: %v", err)
}
env := newTestEnvelope(t, []byte(`{"transferRef":"tx-2","status":"pending"}`))
if err := producer.SendWithOutbox(context.Background(), env); err != nil {
t.Fatalf("failed to enqueue envelope into outbox: %v", err)
}
eventID := env.GetMessageId().String()
processed, err := producer.DispatchPending(context.Background())
if err != nil {
t.Fatalf("first dispatch failed: %v", err)
}
if processed != 1 {
t.Fatalf("expected first dispatch to process 1 event, got %d", processed)
}
processed, err = producer.DispatchPending(context.Background())
if err != nil {
t.Fatalf("second dispatch failed: %v", err)
}
if processed != 1 {
t.Fatalf("expected second dispatch to process 1 event, got %d", processed)
}
processed, err = producer.DispatchPending(context.Background())
if err != nil {
t.Fatalf("third dispatch failed: %v", err)
}
if processed != 0 {
t.Fatalf("expected failed event to be excluded from pending queue, got processed=%d", processed)
}
final := store.EventByID(eventID)
if final == nil {
t.Fatalf("expected outbox event %s to exist", eventID)
}
if final.Status != StatusFailed {
t.Fatalf("expected event to be marked failed after max attempts, got %q", final.Status)
}
if final.Attempts != 2 {
t.Fatalf("expected attempts to equal max attempts (2), got %d", final.Attempts)
}
if final.SentAt != nil {
t.Fatalf("expected sentAt to remain empty for failed event")
}
}
func newTestEnvelope(t *testing.T, payload []byte) me.Envelope {
t.Helper()
env := me.CreateEnvelope("gateway.common.outbox.test", domainmodel.NewNotification(mservice.ChainGateway, notification.NAUpdated))
if _, err := env.Wrap(payload); err != nil {
t.Fatalf("failed to wrap test payload: %v", err)
}
return env
}
type memoryOutboxStore struct {
mu sync.Mutex
eventsByRef map[bson.ObjectID]*Event
refByEvent map[string]bson.ObjectID
}
func newMemoryOutboxStore() *memoryOutboxStore {
return &memoryOutboxStore{
eventsByRef: make(map[bson.ObjectID]*Event),
refByEvent: make(map[string]bson.ObjectID),
}
}
func (s *memoryOutboxStore) Create(_ context.Context, event *Event) error {
if event == nil {
return errors.New("event is nil")
}
s.mu.Lock()
defer s.mu.Unlock()
eventID := strings.TrimSpace(event.EventID)
if eventID == "" {
return errors.New("event id is required")
}
if _, exists := s.refByEvent[eventID]; exists {
return errors.New("duplicate event id")
}
stored := cloneEvent(event)
stored.SetID(bson.NewObjectID())
if stored.Status == "" {
stored.Status = StatusPending
}
ref := *stored.GetID()
s.eventsByRef[ref] = stored
s.refByEvent[eventID] = ref
return nil
}
func (s *memoryOutboxStore) ListPending(_ context.Context, limit int) ([]*Event, error) {
s.mu.Lock()
defer s.mu.Unlock()
pending := make([]*Event, 0, len(s.eventsByRef))
for _, event := range s.eventsByRef {
if event.Status == StatusPending {
pending = append(pending, cloneEvent(event))
}
}
sort.Slice(pending, func(i, j int) bool {
return pending[i].CreatedAt.Before(pending[j].CreatedAt)
})
if limit > 0 && len(pending) > limit {
pending = pending[:limit]
}
return pending, nil
}
func (s *memoryOutboxStore) MarkSent(_ context.Context, eventRef bson.ObjectID, sentAt time.Time) error {
s.mu.Lock()
defer s.mu.Unlock()
event, ok := s.eventsByRef[eventRef]
if !ok {
return errors.New("event not found")
}
event.Status = StatusSent
when := sentAt.UTC()
event.SentAt = &when
event.Update()
return nil
}
func (s *memoryOutboxStore) MarkFailed(_ context.Context, eventRef bson.ObjectID) error {
s.mu.Lock()
defer s.mu.Unlock()
event, ok := s.eventsByRef[eventRef]
if !ok {
return errors.New("event not found")
}
event.Status = StatusFailed
event.Update()
return nil
}
func (s *memoryOutboxStore) IncrementAttempts(_ context.Context, eventRef bson.ObjectID) error {
s.mu.Lock()
defer s.mu.Unlock()
event, ok := s.eventsByRef[eventRef]
if !ok {
return errors.New("event not found")
}
event.Attempts++
event.Update()
return nil
}
func (s *memoryOutboxStore) EventByID(eventID string) *Event {
s.mu.Lock()
defer s.mu.Unlock()
ref, ok := s.refByEvent[eventID]
if !ok {
return nil
}
event, ok := s.eventsByRef[ref]
if !ok {
return nil
}
return cloneEvent(event)
}
func cloneEvent(event *Event) *Event {
if event == nil {
return nil
}
copyEvent := *event
copyEvent.Payload = append([]byte(nil), event.Payload...)
if event.SentAt != nil {
sentAt := *event.SentAt
copyEvent.SentAt = &sentAt
}
return &copyEvent
}
type flakyDirectProducer struct {
mu sync.Mutex
failuresRemaining int
attempts int
}
func (p *flakyDirectProducer) SendMessage(_ me.Envelope) error {
p.mu.Lock()
defer p.mu.Unlock()
p.attempts++
if p.failuresRemaining > 0 {
p.failuresRemaining--
return errors.New("broker unavailable")
}
return nil
}
func (p *flakyDirectProducer) Attempts() int {
p.mu.Lock()
defer p.mu.Unlock()
return p.attempts
}

View File

@@ -0,0 +1,72 @@
package outbox
import (
"context"
"sync"
"github.com/tech/sendico/pkg/merrors"
pmessaging "github.com/tech/sendico/pkg/messaging"
me "github.com/tech/sendico/pkg/messaging/envelope"
pmessagingreliable "github.com/tech/sendico/pkg/messaging/reliable"
"github.com/tech/sendico/pkg/mlogger"
cfgmodel "github.com/tech/sendico/pkg/model"
"go.uber.org/zap"
)
// ReliableRuntime owns a reliable producer lifecycle for gateway outbox dispatch.
type ReliableRuntime struct {
once sync.Once
cancel context.CancelFunc
producer *pmessagingreliable.ReliableProducer
settings pmessagingreliable.Settings
initErr error
}
func (r *ReliableRuntime) Start(logger mlogger.Logger, direct pmessaging.Producer, store Store, messagingSettings cfgmodel.SettingsT, opts ...pmessagingreliable.Option) error {
if r == nil {
return nil
}
if logger == nil {
logger = zap.NewNop()
}
logger = logger.Named("outbox_reliable")
r.once.Do(func() {
reliableProducer, settings, err := NewReliableProducer(logger, direct, store, messagingSettings, opts...)
if err != nil {
r.initErr = err
return
}
r.producer = reliableProducer
r.settings = settings
if r.producer == nil || direct == nil {
logger.Info("Outbox reliable publisher disabled", zap.Bool("enabled", settings.Enabled))
return
}
logger.Info("Outbox reliable publisher configured",
zap.Bool("enabled", settings.Enabled),
zap.Int("batch_size", settings.BatchSize),
zap.Int("poll_interval_seconds", settings.PollIntervalSeconds),
zap.Int("max_attempts", settings.MaxAttempts))
ctx, cancel := context.WithCancel(context.Background())
r.cancel = cancel
go r.producer.Run(ctx)
})
return r.initErr
}
func (r *ReliableRuntime) Send(ctx context.Context, envelope me.Envelope) error {
if r == nil || r.producer == nil {
return merrors.Internal("reliable outbox producer is not configured")
}
return r.producer.SendWithOutbox(ctx, envelope)
}
func (r *ReliableRuntime) Stop() {
if r == nil || r.cancel == nil {
return
}
r.cancel()
}

View File

@@ -0,0 +1,17 @@
package outbox
import (
"context"
"time"
"go.mongodb.org/mongo-driver/v2/bson"
)
// Store persists gateway outbox events.
type Store interface {
Create(ctx context.Context, event *Event) error
ListPending(ctx context.Context, limit int) ([]*Event, error)
MarkSent(ctx context.Context, eventRef bson.ObjectID, sentAt time.Time) error
MarkFailed(ctx context.Context, eventRef bson.ObjectID) error
IncrementAttempts(ctx context.Context, eventRef bson.ObjectID) error
}