Low-Level Design: Social Media Feed (Twitter/Instagram)
A social media feed system manages posts, follows, likes, and generates a personalized home timeline. It is a common LLD question at Meta, Twitter, LinkedIn, and Snap.
Core Entities
from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime
from typing import Optional
import uuid
class PostType(Enum):
TEXT = "text"
IMAGE = "image"
VIDEO = "video"
REPOST = "repost"
@dataclass
class User:
user_id: str
username: str
display_name: str
bio: str = ""
follower_count: int = 0
following_count: int = 0
is_verified: bool = False
@dataclass
class Post:
post_id: str
author_id: str
content: str
post_type: PostType = PostType.TEXT
media_urls: list[str] = field(default_factory=list)
like_count: int = 0
repost_count: int = 0
reply_count: int = 0
parent_post_id: Optional[str] = None # for replies
created_at: datetime = field(default_factory=datetime.utcnow)
is_deleted: bool = False
@dataclass
class Follow:
follower_id: str
followee_id: str
created_at: datetime = field(default_factory=datetime.utcnow)
Follow/Unfollow Service
class FollowService:
def __init__(self):
# followee_id -> set of follower_ids
self._followers: dict[str, set] = {}
# follower_id -> set of followee_ids
self._following: dict[str, set] = {}
self._users: dict[str, User] = {}
def follow(self, follower_id: str, followee_id: str) -> None:
if follower_id == followee_id:
raise ValueError("Cannot follow yourself")
if followee_id in self._following.get(follower_id, set()):
return # already following
self._following.setdefault(follower_id, set()).add(followee_id)
self._followers.setdefault(followee_id, set()).add(follower_id)
# Update counts
if follower_id in self._users:
self._users[follower_id].following_count += 1
if followee_id in self._users:
self._users[followee_id].follower_count += 1
def unfollow(self, follower_id: str, followee_id: str) -> None:
if followee_id not in self._following.get(follower_id, set()):
return # not following
self._following[follower_id].discard(followee_id)
self._followers[followee_id].discard(follower_id)
if follower_id in self._users:
self._users[follower_id].following_count -= 1
if followee_id in self._users:
self._users[followee_id].follower_count -= 1
def get_followers(self, user_id: str) -> set[str]:
return self._followers.get(user_id, set())
def get_following(self, user_id: str) -> set[str]:
return self._following.get(user_id, set())
def is_following(self, follower_id: str, followee_id: str) -> bool:
return followee_id in self._following.get(follower_id, set())
Feed Service — Fan-out on Write (Push Model)
import heapq
class FeedService:
"""
Push model: on post creation, write to all followers' feeds immediately.
Pros: fast reads (O(1) feed lookup).
Cons: expensive writes for high-follower accounts (celebrities with 10M+ followers).
Solution: hybrid — push for regular users, pull for celebrities.
"""
CELEBRITY_THRESHOLD = 1_000_000 # followers
def __init__(self, follow_svc, post_store):
self.follow_svc = follow_svc
self.posts = post_store
# user_id -> list of (timestamp, post_id) for their home feed
self._feeds: dict[str, list] = {}
def publish_post(self, post: Post, user: User) -> None:
"""Fan-out post to all followers' feeds (push model)."""
followers = self.follow_svc.get_followers(post.author_id)
entry = (-post.created_at.timestamp(), post.post_id)
# Also add to author's own feed
heapq.heappush(self._feeds.setdefault(post.author_id, []), entry)
if user.follower_count >= self.CELEBRITY_THRESHOLD:
return # skip fan-out for celebrities; use pull on read
for follower_id in followers:
heapq.heappush(self._feeds.setdefault(follower_id, []), entry)
def get_feed(self, user_id: str, limit: int = 20) -> list[str]:
"""Return top-limit post IDs for user's home feed."""
heap = self._feeds.get(user_id, [])
# Pull celebrity posts (not pre-pushed)
following = self.follow_svc.get_following(user_id)
celebrity_posts = []
for followee_id in following:
followee = self.posts.get_user(followee_id)
if followee and followee.follower_count >= self.CELEBRITY_THRESHOLD:
recent = self.posts.get_recent_posts(followee_id, limit=5)
for p in recent:
celebrity_posts.append((-p.created_at.timestamp(), p.post_id))
# Merge heap + celebrity posts
all_posts = list(heap) + celebrity_posts
all_posts.sort()
seen = set()
result = []
for _, post_id in all_posts:
if post_id not in seen and len(result) < limit:
seen.add(post_id)
result.append(post_id)
return result
Like Service with Idempotency
class LikeService:
def __init__(self, post_store):
self.posts = post_store
self._likes: dict[str, set] = {} # post_id -> set of user_ids
def like(self, user_id: str, post_id: str) -> bool:
liked_by = self._likes.setdefault(post_id, set())
if user_id in liked_by:
return False # already liked, idempotent
liked_by.add(user_id)
post = self.posts.get(post_id)
if post:
post.like_count += 1
return True
def unlike(self, user_id: str, post_id: str) -> bool:
liked_by = self._likes.get(post_id, set())
if user_id not in liked_by:
return False # not liked
liked_by.discard(user_id)
post = self.posts.get(post_id)
if post:
post.like_count = max(0, post.like_count - 1)
return True
def is_liked(self, user_id: str, post_id: str) -> bool:
return user_id in self._likes.get(post_id, set())
def like_count(self, post_id: str) -> int:
return len(self._likes.get(post_id, set()))
Design Trade-offs
| Decision | Choice | Trade-off |
|---|---|---|
| Feed generation | Hybrid push/pull | Fast reads for regular users; celebrities use pull to avoid 10M fan-out writes |
| Like storage | Set per post | O(1) toggle and lookup; in production use Redis SET or DB with unique constraint |
| Follow graph | In-memory sets | For LLD; production uses Graph DB (Neo4j) or adjacency list in Cassandra |
| Feed ordering | Reverse chronological (min-heap) | Simple; algorithmic ranking adds engagement signals |
{
“@context”: “https://schema.org”,
“@type”: “FAQPage”,
“mainEntity”: [
{
“@type”: “Question”,
“name”: “What is the difference between fan-out on write and fan-out on read?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Fan-out on write (push): when a user posts, immediately push the post ID to all followers’ feed caches. Feed reads are O(1) u2014 just read the pre-built list. Write is expensive: O(followers) writes per post. Problematic for celebrities (10M followers = 10M writes). Fan-out on read (pull): feed is assembled at read time by fetching posts from all followed users. Read is expensive: O(following) queries merged and sorted. Push is cheap. Hybrid: push for regular users, pull for celebrities (above a follower threshold, e.g., 1M).”
}
},
{
“@type”: “Question”,
“name”: “How do you handle the celebrity problem in a social feed system?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Define a “celebrity” threshold (e.g., >1M followers). For celebrity accounts, skip fan-out on post creation. On feed generation, for each celebrity the user follows, query the celebrity’s recent posts directly (they are not in the pre-built feed cache). Merge celebrity posts with the pre-built feed using a min-heap sorted by timestamp, deduplicate, and return top-k. The cost: a small number of additional queries per feed load (users typically follow few celebrities). Celebrities’ posts are cached separately by post ID for fast random access.”
}
},
{
“@type”: “Question”,
“name”: “How do you ensure likes are idempotent?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store a set of user_ids per post_id (in Redis: SADD post:{id}:likes user_id returns 0 if already member, 1 if added). Like is a no-op if the user is already in the set. Unlike removes from the set (SREM). Like count is SCARD of the set. This approach handles concurrent requests safely u2014 two simultaneous likes from the same user result in exactly one like. For the database-backed version: use a unique constraint on (post_id, user_id) in the likes table; duplicate insert raises IntegrityError which the application handles as “already liked”.”
}
},
{
“@type”: “Question”,
“name”: “How would you add an algorithmic ranking layer to the feed?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “After generating the reverse-chronological candidate set (top-200 posts), score each post with a ranking model: score = recency_weight * time_decay + engagement_weight * (likes + reposts + comments) + relationship_weight * closeness_score + relevance_weight * topic_affinity. Return the top-k by score. Features come from fast stores: post engagement from Redis counters, user relationship score from a graph feature store, topic affinity from the user’s recent interaction history. The ranking model runs in milliseconds (lightweight ML model or heuristic).”
}
},
{
“@type”: “Question”,
“name”: “How would you scale follows and the social graph to billions of users?”,
“acceptedAnswer”: {
“@type”: “Answer”,
“text”: “Store the follow graph in a distributed adjacency list: Cassandra with partition key = user_id, clustering key = followee_id (for following list) and a reverse table with partition key = followee_id (for followers list). Each row is one edge u2014 compact, O(1) follow/unfollow. For fan-out, use a message queue (Kafka): post creation emits an event, a fan-out worker reads followers from Cassandra in batches and writes to Redis feed caches. This decouples post creation latency from fan-out time. Follower counts are cached in Redis counters (INCR/DECR on follow/unfollow).”
}
}
]
}
Asked at: Snap Interview Guide
Asked at: Twitter/X Interview Guide
Asked at: LinkedIn Interview Guide
Asked at: Airbnb Interview Guide