When to Use a Heap
A heap (priority queue) is the right data structure when you need: the minimum or maximum element in O(log N); the top K elements from a stream; efficient repeated extraction of the smallest/largest. Recognize heap problems by keywords: “top K,” “K closest,” “Kth largest/smallest,” “median of stream,” “merge K sorted,” “minimum cost,” “most frequent.” Python’s heapq is a min-heap; negate values for max-heap behavior.
Top K Elements
import heapq
from collections import Counter
# Pattern 1: K largest elements from an array
# Use a min-heap of size K: if new element > heap[0], replace
def top_k_largest(nums: list[int], k: int) -> list[int]:
# Min-heap of size K: heap[0] = smallest of the top-K
heap = nums[:k]
heapq.heapify(heap)
for n in nums[k:]:
if n > heap[0]:
heapq.heapreplace(heap, n) # pop min, push n (faster than pushpop)
return heap
# Time: O(N log K), Space: O(K)
# Better than sorting O(N log N) when K < list[int]:
count = Counter(nums)
# Min-heap by frequency: evict least frequent when heap exceeds K
heap = []
for num, freq in count.items():
heapq.heappush(heap, (freq, num))
if len(heap) > k:
heapq.heappop(heap) # remove least frequent
return [num for freq, num in heap]
# Time: O(N log K), Space: O(N + K)
# Pattern 3: K closest points to origin (LeetCode 973)
def k_closest(points: list[list[int]], k: int) -> list[list[int]]:
# Max-heap of size K: evict farthest when heap exceeds K
heap = [] # store (-dist, [x, y]) for max-heap simulation
for x, y in points:
dist = x*x + y*y
heapq.heappush(heap, (-dist, [x, y]))
if len(heap) > k:
heapq.heappop(heap) # removes the point with largest (-dist) = farthest
return [point for _, point in heap]
Median of a Data Stream
# Classic two-heap pattern: maintain two halves of the data
# Max-heap (left half): contains all elements = median
# Invariant: len(max_heap) == len(min_heap) OR len(max_heap) == len(min_heap) + 1
class MedianFinder:
def __init__(self):
self.lo = [] # max-heap (negate values)
self.hi = [] # min-heap
def addNum(self, num: int) -> None:
# Always push to lo (max-heap)
heapq.heappush(self.lo, -num)
# Balance: ensure all lo elements self.hi[0]:
heapq.heappush(self.hi, -heapq.heappop(self.lo))
# Balance sizes: lo can have at most 1 more element than hi
if len(self.lo) > len(self.hi) + 1:
heapq.heappush(self.hi, -heapq.heappop(self.lo))
elif len(self.hi) > len(self.lo):
heapq.heappush(self.lo, -heapq.heappop(self.hi))
def findMedian(self) -> float:
if len(self.lo) > len(self.hi):
return -self.lo[0] # odd total: median is top of lo
return (-self.lo[0] + self.hi[0]) / 2 # even total: average of two middles
# Time: O(log N) per add, O(1) per findMedian
# Space: O(N)
# Related: Sliding Window Median (LeetCode 480)
# Maintain two heaps over a sliding window of size K
# When window slides: remove outgoing element (lazy deletion with a hash set)
Merge K Sorted Lists
# Pattern: use a min-heap to efficiently find the next smallest element
# across K sorted lists
from typing import Optional
class ListNode:
def __init__(self, val=0, next=None):
self.val = val
self.next = next
def mergeKLists(lists: list[Optional[ListNode]]) -> Optional[ListNode]:
# Push (value, list_index, node) for each non-empty list's head
heap = []
for i, node in enumerate(lists):
if node:
heapq.heappush(heap, (node.val, i, node))
dummy = ListNode(0)
current = dummy
while heap:
val, i, node = heapq.heappop(heap)
current.next = node
current = current.next
if node.next:
heapq.heappush(heap, (node.next.val, i, node.next))
return dummy.next
# Time: O(N log K) where N = total nodes, K = number of lists
# Space: O(K) for the heap
# Same pattern: merge K sorted arrays, merge K sorted streams
Task Scheduler (Cooldown Problem)
# LeetCode 621: Given tasks and cooldown n,
# find minimum time to execute all tasks
# Between two same-type tasks, must wait n intervals
def leastInterval(tasks: list[str], n: int) -> int:
count = Counter(tasks)
# Max-heap by frequency (negate for Python min-heap)
heap = [-c for c in count.values()]
heapq.heapify(heap)
time = 0
# Queue of (remaining_count, available_at_time) for cooling tasks
from collections import deque
cooldown_queue = deque()
while heap or cooldown_queue:
time += 1
if heap:
# Execute the most frequent available task
freq = heapq.heappop(heap) + 1 # +1 because negated
if freq < 0: # still has remaining count
cooldown_queue.append((freq, time + n))
# Check if any cooling task is ready
if cooldown_queue and cooldown_queue[0][1] == time:
heapq.heappush(heap, cooldown_queue.popleft()[0])
return time
# Time: O(N log N), Space: O(1) (26 possible tasks)
Sliding Window Maximum
# Finding the maximum in each sliding window of size K
# Naive: O(NK) — recompute max for each window
# Optimal: O(N) using a deque (monotonic decreasing queue)
# (Not a heap, but often grouped with heap problems in interviews)
from collections import deque
def maxSlidingWindow(nums: list[int], k: int) -> list[int]:
dq = deque() # stores indices; front = index of max in current window
result = []
for i, n in enumerate(nums):
# Remove elements outside current window
while dq and dq[0] < i - k + 1:
dq.popleft()
# Maintain decreasing order: remove smaller elements from back
while dq and nums[dq[-1]] = k - 1:
result.append(nums[dq[0]])
return result
# Time: O(N), Space: O(K)
Dijkstra’s Algorithm with a Heap
# Shortest path in a weighted graph = repeated extraction of minimum-distance node
# Heap-based Dijkstra: O((V + E) log V)
def dijkstra(graph: dict[int, list[tuple[int,int]]], start: int) -> dict[int, int]:
# graph[u] = [(v, weight), ...]
dist = {start: 0}
heap = [(0, start)] # (distance, node)
while heap:
d, u = heapq.heappop(heap)
if d > dist.get(u, float("inf")):
continue # stale entry in heap
for v, weight in graph[u]:
new_dist = d + weight
if new_dist < dist.get(v, float("inf")):
dist[v] = new_dist
heapq.heappush(heap, (new_dist, v))
return dist
# Heap used: min-heap on (distance, node)
# Lazy deletion: don't remove stale entries; skip when popped (stale check)
Pattern Recognition
- Top K from static array: sort in O(N log N) OR use heap in O(N log K) — heap wins when K << N
- Top K from stream: maintain a K-element min-heap; new element beats the minimum → replace
- Median of stream: two heaps (max-heap for lower half, min-heap for upper half)
- Merge K sorted: heap with one element per list; pop and push next from same list
- Minimum cost to combine: repeatedly combine the two smallest → Huffman coding pattern
- Scheduling with constraints: heap + cooldown queue → task scheduler pattern