I believe the reason for the the slowness of heap_sort_custom is mostly in the number of iterations it takes to visit all nodes.
Below is a plot of array size vs iteration count for heap_sort_custom and heapsort.
As seen in the plot, heap_sort_custom makes atleast twice as many iterations as heapsort. The key to optimizing heap_sort_custom is in optimizing heap traversal.
I have modified heap_sort_custom and heapsort to count and return the total number of iterations to sort heap. Note that in heapsort, the iteration count does not include those during heap build.
import heapq
import random
import matplotlib.pyplot as plt
def heap_sort_custom(lst):
"""
Performs a custom heap sort iterative algorithm without changing the size of the heap. Unlike in standard heap sort, no extractions are performed
Args:
lst (list): The input list to be sorted.
Returns:
list: A sorted list containing elements of the input list in ascending order.
Approach:
- Convert the input list into a min-heap
- Traverse and process the heap iteratively:
- Maintain a set to track indices that have already been processed.
- Replace the value at the current node with the value of either the left child, right child or parent, depending on specific conditions.
- Accumulate results in a separate list as each node is processed.
"""
def left_child(i):
"""
Returns the value of the left child of the node at index i, or infinity if out of bounds.
"""
return float("inf") if 2 * i + 1 >= len(lst) else lst[2 * i + 1]
def right_child(i):
"""
Returns the value of the right child of the node at index i, or infinity if out of bounds.
"""
return float("inf") if 2 * i + 2 >= len(lst) else lst[2 * i + 2]
def parent(i):
"""
Returns the value of parent of the node at index i, or infinity if the node is the root.
"""
return lst[(i - 1) // 2] if i > 0 else float("inf")
heapq.heapify(lst) # Build a min-heap from input list
# A set to keep track of visited indices
visited_indices = set()
# List to store the sorted result
sorted_result = []
# Start traversal from the root of the heap
current_index = 0
#total number of while loop iterations
iteration_count = 0
while len(sorted_result) < len(lst):
iteration_count += 1
if not current_index in visited_indices:
# Add the current node's value to the result and mark it as visited
sorted_result.append(lst[current_index])
visited_indices.add(current_index)
# Replace the current node value with value of either left, right or parent node
if parent(current_index) < min(left_child(current_index), right_child(current_index)):
lst[current_index] = min(left_child(current_index), right_child(current_index))
current_index = (current_index - 1) // 2 # Move to the parent node
elif left_child(current_index) < right_child(current_index):
lst[current_index] = min(right_child(current_index), parent(current_index))
current_index = 2 * current_index + 1 # Move to the left child
else:
lst[current_index] = min(left_child(current_index), parent(current_index))
current_index = 2 * current_index + 2 # Move to the right child
return iteration_count
def heapsort(arr):
def sift_down(arr, n, i, build = True):
elem = arr[i]
nonlocal iteration_count
while True:
if not build:
iteration_count += 1
l = 2 * i + 1
if l >= n:
arr[i] = elem
return
r = 2 * i + 2
c = l
if r < n and arr[l] < arr[r]:
c = r
if elem >= arr[c]:
arr[i] = elem
return
arr[i] = arr[c]
i = c
# total number of sift_down while loop iterations during sorting phase
# doesn't include iterations during heap build
iteration_count = 0
n = len(arr)
for i in range(n // 2, -1, -1):
sift_down(arr, n, i)
for i in range(n - 1, 0, -1):
t = arr[i]
arr[i] = arr[0]
arr[0] = t
sift_down(arr, i, 0, build = False)
return iteration_count
def makedata(n):
res = list(range(n))
random.seed(a=n)
random.shuffle(res)
return res
def sample1(n):
data = makedata(n)
return heap_sort_custom(data[:])
def sample2(n):
data = makedata(n)
return heapsort(data[:])
a = [pow(2, i) for i in range(1,18)]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(a, [sample1(n) for n in a], color='blue', label='heap_sort_custom')
ax.plot(a, [sample2(n) for n in a], color='red', label='heapsort')
ax.set_xlabel('Array Size')
ax.set_ylabel('Iteration Count')
ax.set_title('Comparison of Iteration Counts')
plt.legend()
plt.show()