package delivery import ( "bytes" "context" "errors" "io" "net/http" "strconv" "sync" "time" "github.com/tech/sendico/edge/callbacks/internal/signing" "github.com/tech/sendico/edge/callbacks/internal/storage" "github.com/tech/sendico/pkg/merrors" "github.com/tech/sendico/pkg/mlogger" "go.uber.org/zap" ) const responseDrainLimit = 64 * 1024 type service struct { logger mlogger.Logger cfg Config tasks storage.TaskRepo retry interface { NextAttempt(attempt int, now time.Time, minDelay, maxDelay time.Duration, jitterRatio float64) time.Time } security interface { ValidateURL(ctx context.Context, target string) error } signer signing.Signer obs Observer client *http.Client cancel context.CancelFunc once sync.Once stop sync.Once wg sync.WaitGroup } func newService(deps Dependencies) (Service, error) { if deps.Tasks == nil { return nil, merrors.InvalidArgument("delivery: task repo is required", "tasks") } if deps.Retry == nil { return nil, merrors.InvalidArgument("delivery: retry policy is required", "retry") } if deps.Security == nil { return nil, merrors.InvalidArgument("delivery: security validator is required", "security") } if deps.Signer == nil { return nil, merrors.InvalidArgument("delivery: signer is required", "signer") } logger := deps.Logger if logger == nil { logger = zap.NewNop() } cfg := deps.Config if cfg.WorkerConcurrency <= 0 { cfg.WorkerConcurrency = 1 } if cfg.WorkerPoll <= 0 { cfg.WorkerPoll = 200 * time.Millisecond } if cfg.LockTTL <= 0 { cfg.LockTTL = 30 * time.Second } if cfg.RequestTimeout <= 0 { cfg.RequestTimeout = 10 * time.Second } transport := &http.Transport{ Proxy: http.ProxyFromEnvironment, MaxIdleConns: 200, MaxIdleConnsPerHost: 32, IdleConnTimeout: 90 * time.Second, TLSHandshakeTimeout: 5 * time.Second, ExpectContinueTimeout: time.Second, } client := &http.Client{ Transport: transport, CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse }, } return &service{ logger: logger.Named("delivery"), cfg: cfg, tasks: deps.Tasks, retry: deps.Retry, security: deps.Security, signer: deps.Signer, obs: deps.Observer, client: client, }, nil } func (s *service) Start(ctx context.Context) { s.once.Do(func() { runCtx := ctx if runCtx == nil { runCtx = context.Background() } runCtx, s.cancel = context.WithCancel(runCtx) for i := 0; i < s.cfg.WorkerConcurrency; i++ { workerID := "worker-" + strconv.Itoa(i+1) s.wg.Add(1) go func(id string) { defer s.wg.Done() s.runWorker(runCtx, id) }(workerID) } s.logger.Info("Delivery workers started", zap.Int("workers", s.cfg.WorkerConcurrency)) }) } func (s *service) Stop() { s.stop.Do(func() { if s.cancel != nil { s.cancel() } s.wg.Wait() s.logger.Info("Delivery workers stopped") }) } func (s *service) runWorker(ctx context.Context, workerID string) { for { select { case <-ctx.Done(): return default: } now := time.Now().UTC() task, err := s.tasks.LockNextTask(ctx, now, workerID, s.cfg.LockTTL) if err != nil { s.logger.Warn("Failed to lock next task", zap.String("worker_id", workerID), zap.Error(err)) time.Sleep(s.cfg.WorkerPoll) continue } if task == nil { time.Sleep(s.cfg.WorkerPoll) continue } s.handleTask(ctx, workerID, task) } } func (s *service) handleTask(ctx context.Context, workerID string, task *storage.Task) { started := time.Now() statusCode := 0 result := "failed" attempt := task.Attempt + 1 defer func() { if s.obs != nil { s.obs.ObserveDelivery(result, statusCode, time.Since(started)) } }() if err := s.security.ValidateURL(ctx, task.EndpointURL); err != nil { result = "blocked" _ = s.tasks.MarkFailed(ctx, task.ID, attempt, err.Error(), statusCode, time.Now().UTC()) return } timeout := task.RequestTimeout if timeout <= 0 { timeout = s.cfg.RequestTimeout } signed, err := s.signer.Sign(ctx, task.SigningMode, task.SecretRef, task.Payload, time.Now().UTC()) if err != nil { result = "sign_error" _ = s.tasks.MarkFailed(ctx, task.ID, attempt, err.Error(), statusCode, time.Now().UTC()) return } reqCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, task.EndpointURL, bytes.NewReader(signed.Body)) if err != nil { result = "request_error" _ = s.tasks.MarkFailed(ctx, task.ID, attempt, err.Error(), statusCode, time.Now().UTC()) return } req.Header.Set("Content-Type", "application/json") for key, val := range task.Headers { req.Header.Set(key, val) } for key, val := range signed.Headers { req.Header.Set(key, val) } resp, reqErr := s.client.Do(req) if resp != nil { statusCode = resp.StatusCode _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, responseDrainLimit)) _ = resp.Body.Close() } out := classify(statusCode, reqErr) now := time.Now().UTC() switch out { case outcomeDelivered: result = string(outcomeDelivered) if err := s.tasks.MarkDelivered(ctx, task.ID, statusCode, time.Since(started), now); err != nil { s.logger.Warn("Failed to mark task delivered", zap.String("worker_id", workerID), zap.String("task_id", task.ID.Hex()), zap.Error(err)) } case outcomeRetry: if attempt < task.MaxAttempts { next := s.retry.NextAttempt(attempt, now, task.MinDelay, task.MaxDelay, s.cfg.JitterRatio) result = string(outcomeRetry) lastErr := stringifyErr(reqErr) if reqErr == nil && statusCode > 0 { lastErr = "upstream returned retryable status" } if err := s.tasks.MarkRetry(ctx, task.ID, attempt, next, lastErr, statusCode, now); err != nil { s.logger.Warn("Failed to mark task retry", zap.String("worker_id", workerID), zap.String("task_id", task.ID.Hex()), zap.Error(err)) } } else { result = string(outcomeFailed) lastErr := stringifyErr(reqErr) if reqErr == nil && statusCode > 0 { lastErr = "upstream returned retryable status but max attempts reached" } if err := s.tasks.MarkFailed(ctx, task.ID, attempt, lastErr, statusCode, now); err != nil { s.logger.Warn("Failed to mark task failed", zap.String("worker_id", workerID), zap.String("task_id", task.ID.Hex()), zap.Error(err)) } } default: result = string(outcomeFailed) lastErr := stringifyErr(reqErr) if reqErr == nil && statusCode > 0 { lastErr = "upstream returned non-retryable status" } if err := s.tasks.MarkFailed(ctx, task.ID, attempt, lastErr, statusCode, now); err != nil { s.logger.Warn("Failed to mark task failed", zap.String("worker_id", workerID), zap.String("task_id", task.ID.Hex()), zap.Error(err)) } } } func stringifyErr(err error) string { if err == nil { return "" } if errors.Is(err, context.Canceled) { return "request canceled" } if errors.Is(err, context.DeadlineExceeded) { return "request timeout" } return err.Error() }