Skip to main content
added 17 characters in body
Source Link
toolic
  • 15.8k
  • 6
  • 29
  • 217

Below is a plot of array size vs iteration count for heap_sort_custom and heapsort. As seen in the plot, heap_sort_customheap_sort_custom makes atleastat least twice as many iterations as heapsortheapsort. The key to optimizing heap_sort_customheap_sort_custom is in optimizing heap traversal.   

enter image description here I

I have modified heap_sort_customheap_sort_custom and heapsortheapsort to count and return the total number of iterations to sort heap. Note that in heapsortheapsort, the iteration count does not include those during heap build.

Below is a plot of array size vs iteration count for heap_sort_custom and heapsort. As seen in the plot, heap_sort_custom makes atleast twice as many iterations as heapsort. The key to optimizing heap_sort_custom is in optimizing heap traversal.  enter image description here I have modified heap_sort_custom and heapsort to count and return the total number of iterations to sort heap. Note that in heapsort, the iteration count does not include those during heap build.

Below is a plot of array size vs iteration count for heap_sort_custom and heapsort. As seen in the plot, heap_sort_custom makes at least twice as many iterations as heapsort. The key to optimizing heap_sort_custom is in optimizing heap traversal. 

enter image description here

I have modified heap_sort_custom and heapsort to count and return the total number of iterations to sort heap. Note that in heapsort, the iteration count does not include those during heap build.

Source Link

I believe the reason for the the slowness of heap_sort_custom is mostly in the number of iterations it takes to visit all nodes.

Below is a plot of array size vs iteration count for heap_sort_custom and heapsort. As seen in the plot, heap_sort_custom makes atleast twice as many iterations as heapsort. The key to optimizing heap_sort_custom is in optimizing heap traversal. enter image description here I have modified heap_sort_custom and heapsort to count and return the total number of iterations to sort heap. Note that in heapsort, the iteration count does not include those during heap build.

import heapq
import random
import matplotlib.pyplot as plt
    
def heap_sort_custom(lst):
    """
    Performs a custom heap sort iterative algorithm without changing the size of the heap. Unlike in standard heap sort, no extractions are performed

    Args:
        lst (list): The input list to be sorted.
    Returns:
        list: A sorted list containing elements of the input list in ascending order.

    Approach:
    - Convert the input list into a min-heap
    - Traverse and process the heap iteratively:
        - Maintain a set to track indices that have already been processed.
        - Replace the value at the current node with the value of either the left child, right child or parent, depending on specific conditions.
    - Accumulate results in a separate list as each node is processed.
    """
  
    def left_child(i):
        """
        Returns the value of the left child of the node at index i, or infinity if out of bounds.
        """
        return float("inf") if 2 * i + 1 >= len(lst) else lst[2 * i + 1]

    def right_child(i):
        """
        Returns the value of the right child of the node at index i, or infinity if out of bounds.
        """
        return float("inf") if 2 * i + 2 >= len(lst) else lst[2 * i + 2]

    def parent(i):
        """
        Returns the value of parent of the node at index i, or infinity if the node is the root.
        """
        return lst[(i - 1) // 2] if i > 0 else float("inf")

    heapq.heapify(lst)  # Build a min-heap from input list

    # A set to keep track of visited indices
    visited_indices = set()

    # List to store the sorted result
    sorted_result = []

    # Start traversal from the root of the heap
    current_index = 0

    #total number of while loop iterations  
    iteration_count = 0

    while len(sorted_result) < len(lst):
        iteration_count += 1
        if not current_index in  visited_indices:
            # Add the current node's value to the result and mark it as visited
            sorted_result.append(lst[current_index])
            visited_indices.add(current_index)
        # Replace the current node value with value of either left, right or parent node
        if parent(current_index) < min(left_child(current_index), right_child(current_index)):
            lst[current_index] = min(left_child(current_index), right_child(current_index))
            current_index = (current_index - 1) // 2  # Move to the parent node
        elif left_child(current_index) < right_child(current_index):
            lst[current_index] = min(right_child(current_index), parent(current_index))
            current_index = 2 * current_index + 1  # Move to the left child
        else:
            lst[current_index] = min(left_child(current_index), parent(current_index))
            current_index = 2 * current_index + 2  # Move to the right child

    return iteration_count
    

def heapsort(arr):
    def sift_down(arr, n, i, build = True):
        elem = arr[i]
        nonlocal iteration_count
        while True:
            if not build:
              iteration_count += 1
            l = 2 * i + 1
            if l >= n:
                arr[i] = elem
                return
            r = 2 * i + 2
            c = l
            if r < n and arr[l] < arr[r]:
                c = r
            if elem >= arr[c]:
                arr[i] = elem
                return
            arr[i] = arr[c]
            i = c

    # total number of sift_down while loop iterations during sorting phase
    # doesn't include iterations during heap build
    iteration_count = 0 

    n = len(arr)
  
    for i in range(n // 2, -1, -1):
        sift_down(arr, n, i)
    for i in range(n - 1, 0, -1):
        t = arr[i]
        arr[i] = arr[0]
        arr[0] = t
        sift_down(arr, i, 0, build = False)
    return iteration_count

def makedata(n):
    res = list(range(n))
    random.seed(a=n)
    random.shuffle(res)
    return res

def sample1(n):
    data = makedata(n)
    return heap_sort_custom(data[:])
def sample2(n):
    data = makedata(n)
    return heapsort(data[:])

a = [pow(2, i) for i in range(1,18)]
fig = plt.figure()
ax = fig.add_subplot()
ax.plot(a, [sample1(n) for n in a], color='blue', label='heap_sort_custom')
ax.plot(a, [sample2(n) for n in a], color='red', label='heapsort')
ax.set_xlabel('Array Size')
ax.set_ylabel('Iteration Count') 
ax.set_title('Comparison of Iteration Counts')
plt.legend()
plt.show()