external_api_ratelimit.json 2.5 KB

12345678910111213141516171819202122232425262728293031323334
  1. {
  2. "incident_id": "INC-003",
  3. "service": "payment-service",
  4. "severity": "P2",
  5. "alert": {
  6. "metric": "payment_success_rate",
  7. "value": 23.4,
  8. "threshold": 95.0,
  9. "unit": "percent",
  10. "timestamp": "2024-01-17T16:22:10Z",
  11. "description": "Payment success rate critically below SLO"
  12. },
  13. "logs": [
  14. {"timestamp": "2024-01-17T16:10:00Z", "level": "INFO", "message": "Payment processing started for order_id=98765"},
  15. {"timestamp": "2024-01-17T16:15:00Z", "level": "WARN", "message": "Stripe API slow response: 2.3s for POST /v1/charges"},
  16. {"timestamp": "2024-01-17T16:18:00Z", "level": "ERROR", "message": "Stripe API 429 Too Many Requests: retry_after=60s"},
  17. {"timestamp": "2024-01-17T16:18:01Z", "level": "ERROR", "message": "Retry 1/3 for order_id=99001 (no backoff configured — immediate retry)"},
  18. {"timestamp": "2024-01-17T16:18:02Z", "level": "ERROR", "message": "Stripe API 429 Too Many Requests (retry 1 failed immediately)"},
  19. {"timestamp": "2024-01-17T16:18:02Z", "level": "ERROR", "message": "Retry 2/3 for order_id=99001 (no backoff — immediate retry)"},
  20. {"timestamp": "2024-01-17T16:18:03Z", "level": "ERROR", "message": "Stripe API 429 Too Many Requests (retry 2 failed)"},
  21. {"timestamp": "2024-01-17T16:18:03Z", "level": "ERROR", "message": "Retry 3/3 for order_id=99001 (final retry, no backoff)"},
  22. {"timestamp": "2024-01-17T16:18:04Z", "level": "ERROR", "message": "All retries exhausted for order_id=99001 — payment FAILED"},
  23. {"timestamp": "2024-01-17T16:20:00Z", "level": "CRITICAL", "message": "Stripe API rate limit cascade: 347 failed payments in 2 minutes, amplified by retry storm"},
  24. {"timestamp": "2024-01-17T16:22:10Z", "level": "ALERT", "message": "P2 FIRED: payment_success_rate=23.4% below threshold=95.0%"}
  25. ],
  26. "metrics": {
  27. "payment_success_rate": {"16:10": 99.1, "16:15": 88.3, "16:18": 45.2, "16:20": 27.8, "16:22": 23.4},
  28. "stripe_api_calls_per_min": {"16:10": 45, "16:15": 280, "16:18": 890, "16:20": 1240, "16:22": 1380},
  29. "stripe_api_error_rate": {"16:10": 0.0, "16:15": 3.2, "16:18": 62.4, "16:20": 88.1, "16:22": 94.6},
  30. "failed_payments_count": {"16:10": 0, "16:15": 12, "16:18": 89, "16:20": 347, "16:22": 521}
  31. },
  32. "root_cause": "Promotional flash sale caused 30x payment request spike. Retry logic has no exponential backoff — each 429 triggers immediate retries, creating a retry storm that amplifies the rate limit violation.",
  33. "affected_users": 521
  34. }