Low-Level Design: Customer Support Ticketing System (SLA, Routing, State Machine)

Low-Level Design: Customer Support Ticketing System

A ticketing system manages customer support requests from creation through resolution. It involves ticket routing, priority queuing, SLA tracking, and agent assignment. Asked at Atlassian, Zendesk, and Stripe.

Core Entities


from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime, timedelta
from typing import Optional
import uuid

class Priority(Enum):
    LOW = 1
    MEDIUM = 2
    HIGH = 3
    URGENT = 4

class TicketStatus(Enum):
    OPEN = "open"
    ASSIGNED = "assigned"
    IN_PROGRESS = "in_progress"
    PENDING_CUSTOMER = "pending_customer"
    RESOLVED = "resolved"
    CLOSED = "closed"

class Category(Enum):
    BILLING = "billing"
    TECHNICAL = "technical"
    ACCOUNT = "account"
    GENERAL = "general"

# SLA: max time to first response and resolution by priority
SLA_RESPONSE = {
    Priority.URGENT: timedelta(hours=1),
    Priority.HIGH:   timedelta(hours=4),
    Priority.MEDIUM: timedelta(hours=8),
    Priority.LOW:    timedelta(hours=24),
}
SLA_RESOLUTION = {
    Priority.URGENT: timedelta(hours=4),
    Priority.HIGH:   timedelta(hours=24),
    Priority.MEDIUM: timedelta(days=3),
    Priority.LOW:    timedelta(days=7),
}

@dataclass
class Customer:
    customer_id: str
    name: str
    email: str
    tier: str = "standard"  # "standard", "premium", "enterprise"

@dataclass
class Agent:
    agent_id: str
    name: str
    email: str
    skills: list[Category]
    current_load: int = 0
    max_load: int = 10

    @property
    def is_available(self) -> bool:
        return self.current_load  datetime:
        return self.created_at + SLA_RESPONSE[self.priority]

    @property
    def resolution_due_at(self) -> datetime:
        return self.created_at + SLA_RESOLUTION[self.priority]

    @property
    def is_response_breached(self) -> bool:
        if self.first_response_at:
            return False  # already responded
        return datetime.utcnow() > self.response_due_at

    @property
    def is_resolution_breached(self) -> bool:
        if self.resolved_at:
            return False
        return datetime.utcnow() > self.resolution_due_at

Ticket Service and Routing


class TicketService:
    VALID_TRANSITIONS = {
        TicketStatus.OPEN:             {TicketStatus.ASSIGNED},
        TicketStatus.ASSIGNED:         {TicketStatus.IN_PROGRESS, TicketStatus.OPEN},
        TicketStatus.IN_PROGRESS:      {TicketStatus.PENDING_CUSTOMER, TicketStatus.RESOLVED},
        TicketStatus.PENDING_CUSTOMER: {TicketStatus.IN_PROGRESS, TicketStatus.RESOLVED},
        TicketStatus.RESOLVED:         {TicketStatus.CLOSED, TicketStatus.IN_PROGRESS},
        TicketStatus.CLOSED:           set(),
    }

    def __init__(self, agent_store, notifier):
        self._tickets: dict[str, Ticket] = {}
        self._agents: dict[str, Agent] = agent_store
        self.notifier = notifier

    def create_ticket(self, customer_id: str, subject: str, description: str,
                       category: Category, priority: Priority = Priority.MEDIUM) -> Ticket:
        ticket = Ticket(
            ticket_id=str(uuid.uuid4()),
            customer_id=customer_id,
            subject=subject,
            description=description,
            priority=priority,
            category=category,
        )
        self._tickets[ticket.ticket_id] = ticket
        self._auto_assign(ticket)
        self.notifier.notify_customer(customer_id, f"Ticket #{ticket.ticket_id} created")
        return ticket

    def _auto_assign(self, ticket: Ticket) -> None:
        """Assign to least-loaded available agent skilled in the ticket's category."""
        candidates = [
            a for a in self._agents.values()
            if a.is_available and ticket.category in a.skills
        ]
        if not candidates:
            return  # ticket stays OPEN, will be manually assigned or retried
        best = min(candidates, key=lambda a: a.current_load)
        self._assign(ticket, best.agent_id)

    def _assign(self, ticket: Ticket, agent_id: str) -> None:
        agent = self._agents[agent_id]
        ticket.assigned_agent_id = agent_id
        ticket.status = TicketStatus.ASSIGNED
        agent.current_load += 1
        self.notifier.notify_agent(agent_id, f"Ticket #{ticket.ticket_id} assigned to you")

    def transition(self, ticket_id: str, new_status: TicketStatus,
                    actor_id: str) -> Ticket:
        ticket = self._tickets.get(ticket_id)
        if not ticket:
            raise ValueError(f"Ticket {ticket_id} not found")
        if new_status not in self.VALID_TRANSITIONS[ticket.status]:
            raise ValueError(f"Cannot transition {ticket.status} -> {new_status}")
        old_status = ticket.status
        ticket.status = new_status
        if new_status == TicketStatus.RESOLVED:
            ticket.resolved_at = datetime.utcnow()
            if ticket.assigned_agent_id:
                self._agents[ticket.assigned_agent_id].current_load -= 1
        return ticket

    def add_comment(self, ticket_id: str, author_id: str, author_type: str,
                     body: str, is_internal: bool = False) -> Comment:
        ticket = self._tickets.get(ticket_id)
        if not ticket:
            raise ValueError(f"Ticket {ticket_id} not found")
        comment = Comment(
            comment_id=str(uuid.uuid4()),
            author_id=author_id,
            author_type=author_type,
            body=body,
            is_internal=is_internal,
        )
        ticket.comments.append(comment)
        if author_type == "agent" and not ticket.first_response_at and not is_internal:
            ticket.first_response_at = datetime.utcnow()
        return comment

    def get_sla_breached_tickets(self) -> list[Ticket]:
        return [t for t in self._tickets.values()
                if t.is_response_breached or t.is_resolution_breached]

Priority Queue for Unassigned Tickets


import heapq

class TicketQueue:
    """Priority queue weighting urgent tickets from premium customers highest."""

    def __init__(self, customer_store):
        self._heap = []  # (priority_score, created_at, ticket)
        self.customers = customer_store

    def _score(self, ticket: Ticket) -> int:
        tier_bonus = {"enterprise": 100, "premium": 50, "standard": 0}
        customer = self.customers.get(ticket.customer_id)
        tier = customer.tier if customer else "standard"
        return -(ticket.priority.value * 10 + tier_bonus.get(tier, 0))

    def push(self, ticket: Ticket) -> None:
        score = self._score(ticket)
        heapq.heappush(self._heap, (score, ticket.created_at, ticket))

    def pop(self) -> Ticket:
        if not self._heap:
            raise IndexError("Queue empty")
        _, _, ticket = heapq.heappop(self._heap)
        return ticket

Design Decisions

Decision Choice Rationale
Status transitions State machine with VALID_TRANSITIONS dict Prevents illegal status changes at class level
SLA tracking Computed properties on Ticket No background job needed; checked lazily
Agent load balancing Min current_load among skilled agents Simple, fair; extensible with skill rating
Internal comments is_internal flag on Comment Agents share notes without exposing to customer

Asked at: Atlassian Interview Guide

Asked at: Shopify Interview Guide

Asked at: Stripe Interview Guide

Asked at: Airbnb Interview Guide

Scroll to Top