Low-Level Design: Payment Processing System (Idempotency, Auth-Capture, Refunds)

Low-Level Design: Payment Processing System

A payment processing system handles payment initiation, authorization, capture, refund, and failure recovery. Correctness and idempotency are paramount — double charges or missing refunds are business-critical bugs. Asked at Stripe, Coinbase, DoorDash, and Airbnb.

Core Entities


from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime
from typing import Optional
import uuid

class PaymentStatus(Enum):
    PENDING = "pending"
    AUTHORIZED = "authorized"   # funds held, not captured
    CAPTURED = "captured"       # funds transferred
    FAILED = "failed"
    REFUNDED = "refunded"
    PARTIALLY_REFUNDED = "partially_refunded"
    CANCELLED = "cancelled"

class PaymentMethod(Enum):
    CREDIT_CARD = "credit_card"
    DEBIT_CARD = "debit_card"
    BANK_TRANSFER = "bank_transfer"
    WALLET = "wallet"

@dataclass
class PaymentIntent:
    intent_id: str
    amount_cents: int       # always integer cents
    currency: str           # ISO 4217 e.g. "USD"
    customer_id: str
    merchant_id: str
    payment_method: PaymentMethod
    status: PaymentStatus = PaymentStatus.PENDING
    idempotency_key: str = ""
    gateway_charge_id: Optional[str] = None
    failure_reason: Optional[str] = None
    created_at: datetime = field(default_factory=datetime.utcnow)
    captured_at: Optional[datetime] = None
    refunded_amount_cents: int = 0

    @property
    def refundable_amount(self) -> int:
        return self.amount_cents - self.refunded_amount_cents

@dataclass
class Refund:
    refund_id: str
    intent_id: str
    amount_cents: int
    reason: str
    status: PaymentStatus = PaymentStatus.PENDING
    gateway_refund_id: Optional[str] = None
    created_at: datetime = field(default_factory=datetime.utcnow)

Payment Service with Idempotency


class PaymentService:
    VALID_TRANSITIONS = {
        PaymentStatus.PENDING:              {PaymentStatus.AUTHORIZED, PaymentStatus.FAILED},
        PaymentStatus.AUTHORIZED:           {PaymentStatus.CAPTURED, PaymentStatus.CANCELLED},
        PaymentStatus.CAPTURED:             {PaymentStatus.REFUNDED, PaymentStatus.PARTIALLY_REFUNDED},
        PaymentStatus.PARTIALLY_REFUNDED:   {PaymentStatus.REFUNDED},
        PaymentStatus.FAILED:               set(),
        PaymentStatus.REFUNDED:             set(),
        PaymentStatus.CANCELLED:            set(),
    }

    def __init__(self, gateway, intent_store, idempotency_store):
        self.gateway = gateway   # external payment gateway (Stripe, Braintree)
        self.intents = intent_store
        self.idempotency = idempotency_store

    def create_payment_intent(self, amount_cents: int, currency: str,
                               customer_id: str, merchant_id: str,
                               payment_method: PaymentMethod,
                               idempotency_key: str) -> PaymentIntent:
        # Idempotency: return existing result for duplicate requests
        existing = self.idempotency.get(idempotency_key)
        if existing:
            return existing

        if amount_cents  PaymentIntent:
        intent = self._get_intent(intent_id)
        self._validate_transition(intent, PaymentStatus.AUTHORIZED)
        try:
            charge_id = self.gateway.authorize(
                amount=intent.amount_cents,
                currency=intent.currency,
                customer_id=intent.customer_id,
            )
            intent.gateway_charge_id = charge_id
            intent.status = PaymentStatus.AUTHORIZED
        except GatewayDeclineError as e:
            intent.status = PaymentStatus.FAILED
            intent.failure_reason = str(e)
        self.intents.save(intent)
        return intent

    def capture(self, intent_id: str) -> PaymentIntent:
        intent = self._get_intent(intent_id)
        self._validate_transition(intent, PaymentStatus.CAPTURED)
        self.gateway.capture(intent.gateway_charge_id)
        intent.status = PaymentStatus.CAPTURED
        intent.captured_at = datetime.utcnow()
        self.intents.save(intent)
        return intent

    def refund(self, intent_id: str, amount_cents: int, reason: str) -> Refund:
        intent = self._get_intent(intent_id)
        if intent.status not in (PaymentStatus.CAPTURED,
                                  PaymentStatus.PARTIALLY_REFUNDED):
            raise ValueError(f"Cannot refund intent in status {intent.status}")
        if amount_cents > intent.refundable_amount:
            raise ValueError(f"Refund {amount_cents} exceeds refundable "
                             f"{intent.refundable_amount} cents")

        refund_id = str(uuid.uuid4())
        gateway_refund_id = self.gateway.refund(
            charge_id=intent.gateway_charge_id,
            amount=amount_cents,
        )
        refund = Refund(
            refund_id=refund_id,
            intent_id=intent_id,
            amount_cents=amount_cents,
            reason=reason,
            status=PaymentStatus.REFUNDED,
            gateway_refund_id=gateway_refund_id,
        )
        intent.refunded_amount_cents += amount_cents
        intent.status = (PaymentStatus.REFUNDED
                          if intent.refunded_amount_cents == intent.amount_cents
                          else PaymentStatus.PARTIALLY_REFUNDED)
        self.intents.save(intent)
        return refund

    def _get_intent(self, intent_id: str) -> PaymentIntent:
        intent = self.intents.get(intent_id)
        if not intent:
            raise ValueError(f"PaymentIntent {intent_id} not found")
        return intent

    def _validate_transition(self, intent: PaymentIntent,
                               new_status: PaymentStatus) -> None:
        allowed = self.VALID_TRANSITIONS.get(intent.status, set())
        if new_status not in allowed:
            raise ValueError(f"Cannot transition {intent.status} -> {new_status}")

Retry with Exponential Backoff


import time
import random

class ResilientGateway:
    def __init__(self, gateway, max_retries: int = 3):
        self.gateway = gateway
        self.max_retries = max_retries

    def authorize(self, **kwargs):
        last_exc = None
        for attempt in range(self.max_retries):
            try:
                return self.gateway.authorize(**kwargs)
            except GatewayTimeoutError as e:
                last_exc = e
                backoff = (2 ** attempt) + random.uniform(0, 1)
                time.sleep(backoff)
            except GatewayDeclineError:
                raise  # Don't retry hard declines (NSF, stolen card)
        raise last_exc

Design Decisions

Decision Choice Rationale
Amount storage Integer cents Avoids float arithmetic errors
Idempotency Client-provided key + TTL store Network retries safe; prevents double-charges
Two-phase (auth+capture) Separate authorize/capture Hold funds at order; capture at fulfillment
Retry policy Backoff + jitter, no retry on declines Timeouts may recover; declines won’t
Refund tracking refunded_amount_cents on intent Enables partial refunds and full-refund detection

{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “What is an idempotency key and why is it critical in payment systems?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “An idempotency key is a unique identifier (UUID) generated by the client for each payment request. The server stores the key and its result. On retry (network timeout, client crash), the client sends the same key u2014 the server returns the cached result without re-executing the payment. Without idempotency, a network timeout after authorization but before the response arrives causes the client to retry, potentially charging the customer twice. Idempotency keys must have a TTL (e.g., 24 hours) to prevent the store from growing unboundedly.”
}
},
{
“@type”: “Question”,
“name”: “What is the difference between authorize and capture in payment processing?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Authorize (pre-auth): the payment gateway contacts the card network and places a hold on the customer’s funds. Funds are not transferred but are reserved. Capture: funds are actually moved from the customer to the merchant. Two-phase enables: place a hold at order creation, capture only at shipment (preventing charging before delivery). Cancel the auth if the order is cancelled u2014 no charge occurs. Most card networks allow holds for 5-7 days before they auto-expire. Stripe, Braintree, and Adyen all support separate authorize and capture calls.”
}
},
{
“@type”: “Question”,
“name”: “How do you handle gateway timeouts in a payment system?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Gateway timeouts are ambiguous u2014 the charge may or may not have occurred. Strategy: (1) Retry with exponential backoff for timeout errors, but NOT for hard declines (NSF, invalid card number). (2) Use idempotency keys on all gateway calls u2014 the gateway deduplicates retries. (3) If retries exhaust and state is unknown, query the gateway for the charge status by your idempotency key before marking failed. (4) Store the intent in PENDING state; a background reconciliation job checks PENDING intents older than 5 minutes against the gateway.”
}
},
{
“@type”: “Question”,
“name”: “How do you implement partial refunds?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Track refunded_amount_cents on the PaymentIntent. On each refund request: validate refund_amount <= (captured_amount – already_refunded). Call the gateway refund API with the specific amount. Update refunded_amount_cents += refund_amount. Set status to PARTIALLY_REFUNDED if still has remaining balance, or REFUNDED if fully refunded. Store each Refund as a separate record with its own gateway refund ID. This enables: multiple partial refunds summing to the original amount, audit trail of each refund action, and accurate accounting."
}
},
{
"@type": "Question",
"name": "How would you scale a payment processing system to handle millions of transactions per day?",
"acceptedAnswer": {
"@type": "Answer",
"text": "Payments are write-heavy and require strong consistency. Shard the payment intents table by customer_id or merchant_id (most queries are merchant-scoped). Use optimistic locking (version column) for concurrent capture/refund operations on the same intent. Store idempotency keys in Redis (fast TTL-based expiry). Use a message queue (Kafka) for async operations: post-payment webhooks, analytics events, fraud signals. For reconciliation, run a daily batch job comparing your DB state against gateway settlement reports. Use exactly-once Kafka semantics for downstream payment events."
}
}
]
}

Asked at: Stripe Interview Guide

Asked at: Coinbase Interview Guide

Asked at: Airbnb Interview Guide

Asked at: DoorDash Interview Guide

Scroll to Top