Low-Level Design: Notification Service — Push, Email, SMS, Templates, and Deduplication

Requirements

Functional: send notifications via multiple channels (push, email, SMS, in-app), support notification templates with variable substitution, allow users to set preferences (opt-in/opt-out per channel per category), batch notifications (digest), deduplicate (don’t send the same alert twice), track delivery status.

Non-functional: at-least-once delivery, async (never block the triggering action), channel fallback (if push fails, try email), scalable to millions of notifications/day.

Core Entities

from enum import Enum
from dataclasses import dataclass, field
from typing import Optional, List, Dict
from datetime import datetime

class Channel(Enum):
    PUSH  = "PUSH"
    EMAIL = "EMAIL"
    SMS   = "SMS"
    INAPP = "INAPP"

class NotificationType(Enum):
    ORDER_CONFIRMED   = "ORDER_CONFIRMED"
    PAYMENT_FAILED    = "PAYMENT_FAILED"
    MESSAGE_RECEIVED  = "MESSAGE_RECEIVED"
    PRICE_DROP        = "PRICE_DROP"
    ACCOUNT_SECURITY  = "ACCOUNT_SECURITY"

@dataclass
class NotificationTemplate:
    template_id: str
    notification_type: NotificationType
    channel: Channel
    subject: str           # for email
    body: str              # supports {{variable}} placeholders
    priority: int          # 1=critical, 2=high, 3=normal, 4=low

@dataclass
class UserPreferences:
    user_id: str
    # channel -> list of types the user wants on that channel
    subscriptions: Dict[Channel, List[NotificationType]] = field(default_factory=dict)
    dnd_start: Optional[int] = None   # hour 0-23: do not disturb window start
    dnd_end:   Optional[int] = None

@dataclass
class NotificationRequest:
    request_id: str
    user_id: str
    notification_type: NotificationType
    variables: Dict[str, str]          # template variable values
    idempotency_key: str               # prevent duplicate sends
    priority: int = 3
    scheduled_at: Optional[datetime] = None

@dataclass
class NotificationRecord:
    record_id: str
    request_id: str
    user_id: str
    channel: Channel
    notification_type: NotificationType
    status: str      # 'PENDING' | 'SENT' | 'DELIVERED' | 'FAILED' | 'SKIPPED'
    sent_at: Optional[datetime] = None
    error: Optional[str] = None
    external_id: Optional[str] = None   # FCM message ID, SendGrid ID, etc.

Notification Service Architecture

class NotificationService:
    def send(self, request: NotificationRequest) -> List[NotificationRecord]:
        # Idempotency check
        if db.notification_exists(request.idempotency_key):
            return db.get_records_by_idempotency(request.idempotency_key)

        prefs = db.get_user_preferences(request.user_id)
        channels = self._select_channels(request, prefs)
        records = []
        for channel in channels:
            template = db.get_template(request.notification_type, channel)
            if not template: continue
            body = self._render(template.body, request.variables)
            record = NotificationRecord(
                record_id=generate_id(),
                request_id=request.request_id,
                user_id=request.user_id,
                channel=channel,
                notification_type=request.notification_type,
                status='PENDING',
            )
            db.save(record)
            # Dispatch to channel-specific queue
            self._enqueue(channel, record, body, template.subject)
            records.append(record)
        return records

    def _select_channels(self, request, prefs: UserPreferences) -> List[Channel]:
        all_channels = [Channel.PUSH, Channel.EMAIL, Channel.INAPP]
        # ACCOUNT_SECURITY is always sent (cannot opt out)
        if request.notification_type == NotificationType.ACCOUNT_SECURITY:
            return [Channel.PUSH, Channel.EMAIL]
        # Filter by user preferences
        selected = []
        for ch in all_channels:
            user_types = prefs.subscriptions.get(ch, [])
            if request.notification_type in user_types:
                selected.append(ch)
        return selected or [Channel.INAPP]   # always at least in-app

    def _render(self, template: str, variables: dict) -> str:
        for key, value in variables.items():
            template = template.replace('{{' + key + '}}', value)
        return template

Channel Handlers

class PushHandler:
    def send(self, user_id: str, body: str, record: NotificationRecord):
        device_tokens = db.get_device_tokens(user_id)
        if not device_tokens:
            record.status = 'SKIPPED'
            record.error = 'No device tokens'
            db.save(record); return
        for token in device_tokens:
            try:
                response = fcm_client.send({
                    'token': token,
                    'notification': {'title': 'Notification', 'body': body},
                })
                record.status = 'SENT'
                record.external_id = response.message_id
            except Exception as e:
                if 'UNREGISTERED' in str(e):
                    db.remove_device_token(token)   # token expired
                record.status = 'FAILED'
                record.error = str(e)
        db.save(record)

class EmailHandler:
    def send(self, user_id: str, subject: str, body: str, record: NotificationRecord):
        user = db.get_user(user_id)
        if not user.email_verified:
            record.status = 'SKIPPED'; db.save(record); return
        response = sendgrid_client.send(
            to=user.email, subject=subject, html_body=body
        )
        record.status = 'SENT'
        record.external_id = response.headers.get('X-Message-Id')
        db.save(record)

Digest / Batching

class DigestService:
    """Batch low-priority notifications into a daily digest."""
    DIGEST_TYPES = {NotificationType.PRICE_DROP}

    def queue_for_digest(self, request: NotificationRequest):
        db.add_to_digest_queue(request.user_id, request)

    def send_daily_digest(self, user_id: str):
        pending = db.get_digest_queue(user_id)
        if not pending: return
        grouped = {}
        for req in pending:
            grouped.setdefault(req.notification_type.value, []).append(req)
        body = self._render_digest(grouped)
        self._send_email(user_id, 'Your Daily Digest', body)
        db.clear_digest_queue(user_id)

Deduplication

DEDUP_WINDOW_SECONDS = 3600   # 1 hour

def is_duplicate(user_id: str, notification_type: str, dedup_key: str) -> bool:
    cache_key = f"notif_dedup:{user_id}:{notification_type}:{dedup_key}"
    result = r.set(cache_key, 1, ex=DEDUP_WINDOW_SECONDS, nx=True)
    return result is None   # None = key existed → duplicate

Interview Questions

Q: How do you handle a user who has push notifications but is currently offline?

FCM (Firebase Cloud Messaging) queues push notifications when a device is offline and delivers them when the device reconnects (up to 4 weeks). The notification service sends to FCM regardless of online status — FCM handles the delivery. FCM returns a success even if the device is offline (it accepts the message). The device receives it when it comes online. For time-sensitive notifications (flash sale ends in 1 hour), set a TTL on the FCM message — if undelivered within TTL, discard rather than showing stale content.

Q: How would you scale this to 100 million notifications per day?

Use Kafka as the message bus — one topic per channel (push-notifications, email-notifications, sms-notifications). Producers (notification service) publish to Kafka. Consumers (channel handlers) are horizontally scaled worker pools. Each channel has different throughput: push = millions/minute (FCM handles batching), email = rate-limited by ESP (e.g., SendGrid = 100K/hour on basic plan → use multiple API keys or premium tier), SMS = expensive, rate-limited (use only for critical). Priority queues: critical notifications (ACCOUNT_SECURITY) get their own Kafka partition processed first.

{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “How do you design a notification service that supports push, email, and SMS?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Use a channel abstraction: define a NotificationChannel interface with a send(user_id, template, data) method. Implement PushChannel (FCM/APNs), EmailChannel (SendGrid/SES), SMSChannel (Twilio). A NotificationRouter maps notification_type to the list of channels to use. User preferences table stores per-user opt-in/opt-out per channel and notification type. The entry point is a Notification event on Kafka. The Notification Service consumes the event, loads user preferences, filters to opted-in channels, renders the template for each channel, and dispatches to each channel handler. Channel handlers are independent — push failure does not block email delivery.”
}
},
{
“@type”: “Question”,
“name”: “How do you prevent duplicate notifications from being sent?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Use a deduplication key: hash(user_id + notification_type + reference_id + channel). Before sending, SET NX (Redis) with this key and a TTL matching your deduplication window (e.g., 24 hours for transactional, 1 hour for digest). If the SET returns 0 (key exists), skip this notification — it was already sent. If 1, proceed with delivery. Store the deduplication key in the notifications_sent table for audit trails. For idempotent retry on delivery failure (FCM returned 5xx), re-enqueue with the same deduplication key — the next attempt finds the key only if the send actually succeeded, so mark dedup AFTER confirmed delivery.”
}
},
{
“@type”: “Question”,
“name”: “How do you implement notification templates with variable substitution?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store templates in a database keyed by (notification_type, channel, locale). Template body uses {{variable}} placeholders: Hello {{first_name}}, your order {{order_id}} has shipped. Template rendering: replace all {{key}} with values from the data payload using a regex or string replacement. Use Handlebars for richer templates (conditionals, loops). Versioning: templates have a version field; the notification event can specify which version to use (useful for A/B testing). Cache rendered templates in Redis for identical payloads (same type + same data hash). Store the rendered content on the notification record for debugging and auditing.”
}
},
{
“@type”: “Question”,
“name”: “How do you implement digest notifications to avoid alert fatigue?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Instead of sending an email per event, aggregate events into a digest. Store events in a pending_notifications table with (user_id, notification_type, payload, created_at). A scheduled job runs every 15 minutes: SELECT all pending notifications per user, group by type, render a digest template with all items, send one email/push, mark rows as sent. Digest windows: some notifications are immediate (security alerts, OTP), others are digest-able (weekly summary, comment notifications). Store delivery_mode=IMMEDIATE or DIGEST on the notification type config. Users can configure their digest frequency (real-time, hourly, daily) in preferences.”
}
},
{
“@type”: “Question”,
“name”: “How do you scale a notification service to send 10 million notifications per hour?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “10M/hour = 2800/second. Fan-out the work: Kafka partitioned by user_id (keeps ordering per user). Multiple consumer groups — one per channel. Each channel’s consumer pool auto-scales independently: push volume >> email volume. For FCM push: use batch send API (up to 500 tokens per request), reducing API calls by 500x. For email: use SendGrid batch API (1000 per request). Rate limit per channel: FCM allows 600K messages/minute per project. Separate high-priority queue (OTP, payment alerts) from low-priority (marketing). Cache user device tokens and preferences in Redis — avoid DB read per notification. Dead letter queue for failed deliveries with exponential backoff retry.”
}
}
]
}

Asked at: Snap Interview Guide

Asked at: DoorDash Interview Guide

Asked at: Shopify Interview Guide

Asked at: Stripe Interview Guide

Scroll to Top