7
\$\begingroup\$

I've created BRESort - an adaptive sorting engine that dynamically selects optimal algorithms based on data patterns. It achieves 3.6-4.2x speedup over stdlib qsort while preventing \$O(n^2)\$ catastrophes.

GitHub Repository: https://github.com/LazyCauchPotato/BRESort

Key Components:

  • bresort.h - Public API and data generators
  • bresort.c - Core algorithm implementation
  • Test suites for validation and performance

Core Innovation:

The engine analyzes data in one pass (range, sortedness, patterns) to choose between counting sort, insertion sort, or hybrid approaches.

/* Core analysis function - the "brain" of BRESort */

    static int bres_analyze_strategy(unsigned char arr[], int n) {
        if (n <= 16) return 1; // Insertion sort for tiny arrays
    
        unsigned char min_val = 255, max_val = 0;
        int out_of_order = 0;
        int consecutive_sorted = 0;
        int max_consecutive = 0;
    
        // Single-pass analysis: extract data patterns
        for (int i = 0; i < n; i++) {
            // 1. Measure value range
            if (arr[i] < min_val) min_val = arr[i];
            if (arr[i] > max_val) max_val = arr[i];
    
            // 2. Measure sortedness and patterns
            if (i > 0) {
                if (arr[i] >= arr[i-1]) {
                    consecutive_sorted++;
                    if (consecutive_sorted > max_consecutive) {
                        max_consecutive = consecutive_sorted;
                    }
                } else {
                    out_of_order++;
                    consecutive_sorted = 0;
                }
            }
        }
    
        unsigned char range = max_val - min_val;
    
        // Decision tree based on extracted patterns
        if (range == 0) return 0;        // All identical - counting sort
        if (range < 16) return 0;        // Small range - counting sort
        if (range < 64 && n > 100) return 2; // Medium complexity - hybrid
        if (out_of_order < 3 || max_consecutive > n * 0.8) return 1; // Mostly sorted - insertion
        if (n > 1000 && range > 128) return 2; // Large & complex - hybrid
        
        return 0; // Default: counting sort (safe)
    }
    
    /* Main dispatch function */
    void BRESort_byte(unsigned char arr[], int n) {
        if (n <= 1) return;
        
        // Tiny arrays always use insertion sort
        if (n <= 8) {
            bres_insertion_sort(arr, n);
            return;
        }
    
        // Analyze and choose optimal strategy
        int strategy = bres_analyze_strategy(arr, n);
    
        switch (strategy) {
            case 0: bres_counting_sort(arr, n); break;  // Counting sort
            case 1: bres_insertion_sort(arr, n); break; // Insertion sort  
            case 2: bres_hybrid_sort(arr, n); break;    // Hybrid approach
            default: bres_counting_sort(arr, n); break; // Safe fallback
        }
    }
    
    /* Optimized counting sort for bytes */
    static void bres_counting_sort(unsigned char arr[], int n) {
        int count[256] = {0};
        
        // Count occurrences
        for (int i = 0; i < n; i++) {
            count[arr[i]]++;
        }
        
        // Reconstruct sorted array
        int index = 0;
        for (int value = 0; value < 256; value++) {
            int occurrences = count[value];
            if (occurrences > 0) {
                if (occurrences > 3) {
                    // Optimize runs with memset
                    memset(&arr[index], value, occurrences);
                    index += occurrences;
                } else {
                    // Manual assignment for small counts
                    for (int j = 0; j < occurrences; j++) {
                        arr[index++] = value;
                    }
                }
            }
        }
    }

I'd appreciate any feedback on the overall architecture and C implementation.

bresort.c

#include "bresort.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

// ============================================================================
// OPTIMIZED CORE ALGORITHMS
// ============================================================================

/**
 * Ultra-optimized insertion sort for small arrays
 * Uses direct comparisons for maximum speed
 */
static void bres_insertion_sort(unsigned char arr[], int n) {
    for (int i = 1; i < n; i++) {
        unsigned char key = arr[i];
        int j = i - 1;

        // Manual unrolling for small arrays
        if (n <= 8) {
            while (j >= 0 && arr[j] > key) {
                arr[j + 1] = arr[j];
                j--;
            }
        } else {
            // For slightly larger arrays, use standard approach
            while (j >= 0 && arr[j] > key) {
                arr[j + 1] = arr[j];
                j--;
            }
        }
        arr[j + 1] = key;
    }
}

/**
 * Optimized counting sort for bytes
 * O(n) time complexity, perfect for byte data
 */
static void bres_counting_sort(unsigned char arr[], int n) {
    if (n <= 1) return;

    // Use stack allocation for small arrays, heap for large ones
    int count[256] = {0};

    // Single pass: count occurrences
    for (int i = 0; i < n; i++) {
        count[arr[i]]++;
    }

    // Reconstruct sorted array in-place
    int index = 0;
    for (int value = 0; value < 256; value++) {
        int occurrences = count[value];
        if (occurrences > 0) {
            // Use memset for consecutive values (small optimization)
            if (occurrences > 3) {
                memset(&arr[index], value, occurrences);
                index += occurrences;
            } else {
                // Manual assignment for small counts
                for (int j = 0; j < occurrences; j++) {
                    arr[index++] = value;
                }
            }
        }
    }
}

/**
 * Hybrid approach for medium-sized arrays with small range
 */
static void bres_hybrid_sort(unsigned char arr[], int n) {
    unsigned char min_val = 255, max_val = 0;

    // Find range in single pass
    for (int i = 0; i < n; i++) {
        if (arr[i] < min_val) min_val = arr[i];
        if (arr[i] > max_val) max_val = arr[i];
    }

    unsigned char range = max_val - min_val;

    // Choose strategy based on range characteristics
    if (range < 32 || range < n / 8) {
        bres_counting_sort(arr, n);
    } else {
        bres_insertion_sort(arr, n);
    }
}

// ============================================================================
// STRATEGY SELECTION ENGINE - THE INTELLIGENT CORE
// ============================================================================

/**
 * Analyzes byte array to choose optimal sorting strategy
 * Returns: 0=counting, 1=insertion, 2=hybrid
 *
 * Decision Tree:
 * - Very small arrays → Insertion sort
 * - Small value range → Counting sort (O(n) performance)
 * - Mostly sorted data → Insertion sort (excel on nearly-ordered)
 * - Complex patterns → Hybrid approach (balanced performance)
 * - Default fallback → Counting sort (safe choice)
 */
static int bres_analyze_strategy(unsigned char arr[], int n) {
    if (n <= 16) {
        return 1; // Insertion sort for very small arrays
    }

    unsigned char min_val = 255, max_val = 0;
    int out_of_order = 0;
    int consecutive_sorted = 0;
    int max_consecutive = 0;

    // Single-pass analysis for multiple metrics
    for (int i = 0; i < n; i++) {
        // Track min/max
        if (arr[i] < min_val) min_val = arr[i];
        if (arr[i] > max_val) max_val = arr[i];

        // Track sortedness
        if (i > 0) {
            if (arr[i] >= arr[i-1]) {
                consecutive_sorted++;
                if (consecutive_sorted > max_consecutive) {
                    max_consecutive = consecutive_sorted;
                }
            } else {
                out_of_order++;
                consecutive_sorted = 0;
            }
        }
    }

    unsigned char range = max_val - min_val;

    // Strategy decision tree
    if (range == 0) {
        return 0; // All identical - counting sort instant
    }

    if (range < 16) {
        return 0; // Very small range - counting sort optimal
    }

    if (range < 64 && n > 100) {
        return 2; // Medium range, large array - hybrid
    }

    if (out_of_order < 3 || max_consecutive > n * 0.8) {
        return 1; // Mostly sorted - insertion sort
    }

    if (n > 1000 && range > 128) {
        return 2; // Large array with large range - hybrid
    }

    return 0; // Default to counting sort
}

// ============================================================================
// MAIN BRESORT ALGORITHM
// ============================================================================

void BRESort_byte(unsigned char arr[], int n) {
    if (n <= 1) return;

    // For tiny arrays, always use insertion sort
    if (n <= 8) {
        bres_insertion_sort(arr, n);
        return;
    }

    // Analyze and choose optimal strategy
    int strategy = bres_analyze_strategy(arr, n);

    switch (strategy) {
        case 0: // Counting sort
            bres_counting_sort(arr, n);
            break;
        case 1: // Insertion sort
            bres_insertion_sort(arr, n);
            break;
        case 2: // Hybrid approach
            bres_hybrid_sort(arr, n);
            break;
        default: // Should never happen
            bres_counting_sort(arr, n);
            break;
    }
}

// ============================================================================
// VALIDATION FUNCTION
// ============================================================================

bool bres_is_sorted_byte(unsigned char arr[], int n) {
    for (int i = 0; i < n - 1; i++) {
        if (arr[i] > arr[i + 1]) {
            return false;
        }
    }
    return true;
}

// ============================================================================
// SPECIALIZED BYTE DATA GENERATORS
// ============================================================================

void bres_generate_bytes_random(unsigned char arr[], int n) {
    for (int i = 0; i < n; i++) {
        arr[i] = rand() % 256;
    }
}

void bres_generate_bytes_sorted(unsigned char arr[], int n) {
    if (n <= 1) {
        if (n == 1) arr[0] = 0;
        return;
    }
    for (int i = 0; i < n; i++) {
        arr[i] = (i * 255) / (n - 1);
    }
}

void bres_generate_bytes_reverse(unsigned char arr[], int n) {
    if (n <= 1) {
        if (n == 1) arr[0] = 255;
        return;
    }
    for (int i = 0; i < n; i++) {
        arr[i] = 255 - ((i * 255) / (n - 1));
    }
}

void bres_generate_bytes_few_unique(unsigned char arr[], int n) {
    unsigned char patterns[] = {0x00, 0x0F, 0xF0, 0xFF, 0x55, 0xAA};
    for (int i = 0; i < n; i++) {
        arr[i] = patterns[rand() % 6];
    }
}

void bres_generate_bytes_all_zeros(unsigned char arr[], int n) {
    memset(arr, 0, n);
}

void bres_generate_bytes_all_ones(unsigned char arr[], int n) {
    memset(arr, 0xFF, n);
}

void bres_generate_bytes_alternating(unsigned char arr[], int n) {
    for (int i = 0; i < n; i++) {
        arr[i] = (i % 2) ? 0xAA : 0x55;
    }
}

void bres_generate_bytes_realistic(unsigned char arr[], int n) {
    // Simulates real-world byte data (like image pixels)
    for (int i = 0; i < n; i++) {
        // Create clusters of similar values (common in real data)
        if (rand() % 100 < 70) { // 70% of values in clusters
            int base = rand() % 240;
            arr[i] = base + (rand() % 16);
        } else { // 30% random
            arr[i] = rand() % 256;
        }
    }
}

bresort.h

#ifndef BRESORT_H
#define BRESORT_H

#include <stdbool.h>

/**
 * BRESort_byte - Ultimate byte sorter (3-4x faster than qsort)
 * @arr: Array of unsigned chars (bytes) to sort
 * @n: Number of elements in array
 *
 * Features:
 * - 3.72x faster than qsort on random data
 * - Adaptive strategy selection
 * - Optimized for byte data (0-255)
 * - Minimal memory overhead (256 bytes)
 */
void BRESort_byte(unsigned char arr[], int n);

/**
 * bres_is_sorted_byte - Validates byte array sorting
 * @arr: Array to check
 * @n: Number of elements
 * Returns: true if sorted, false otherwise
 */
bool bres_is_sorted_byte(unsigned char arr[], int n);

#endif
\$\endgroup\$
11
  • 2
    \$\begingroup\$ In bres_insertion_sort()'s Manual unrolling for small array-if-else, the controlled statements look identical. \$\endgroup\$ Commented Oct 11 at 3:56
  • 1
    \$\begingroup\$ Pondering the name BRESort_byte(): Do you plan to come up with routines for different keys/records? \$\endgroup\$ Commented Oct 11 at 4:14
  • \$\begingroup\$ Oh...you're absolutely right about the insertion sort bug, I accidentally left identical code in both branches. The manual unrolling should actually optimize for tiny arrays. Regarding BRESort_byte() naming: yes, I chose bytes as the starting point because counting sort is particularly effective there, but the adaptive selection concept could definitely extend to other types. I have already tested on float data with promissing results, it has slightly different analysis heuristics, but the core 'analyze then choose' is the same. Thanks for superb review...have a lot of fixing to do... \$\endgroup\$ Commented Oct 11 at 7:17
  • 1
    \$\begingroup\$ "It achieves 3.6-4.2x speedup over stdlib qsort" -- there is no single "stdlib qsort". Your sort may outperform the qsort of your particular C implementation, but that's not the same thing. \$\endgroup\$ Commented Oct 13 at 21:54
  • 1
    \$\begingroup\$ Frame challenge: how does it compare to TimSort? \$\endgroup\$ Commented Oct 13 at 22:49

4 Answers 4

8
\$\begingroup\$

Without seeing all of the code, based only on what I can see, I'm not a fan of the 0, 1, and 2 being magic numbers to indicate which sort to use. This should almost certainly be an enum.

enum SortStrategy {
    COUNTING_SORT,
    INSERTION_SORT,
    HYBRID_SORT
};

Your switch also could be refactored to avoid repetition by letting the counting sort case fall through to the default case. By using the enum with meaningful names, the comments become extraneous.

    switch (strategy) { 
    case INSERTION_SORT: 
        bres_insertion_sort(arr, n); 
        break;   

    case HYBRID_SORT: 
        bres_hybrid_sort(arr, n); 
        break;   
 
    case COUNTING_SORT:
    default: 
        bres_counting_sort(arr, n); 
        break;
    }

Other magic numbers to eliminate would be the thresholds for invoking each sorting method.

You have this loop:

    // Single-pass analysis: extract data patterns
    for (int i = 0; i < n; i++) {
        // 1. Measure value range
        if (arr[i] < min_val) min_val = arr[i];
        if (arr[i] > max_val) max_val = arr[i];

        // 2. Measure sortedness and patterns
        if (i > 0) {
            if (arr[i] >= arr[i-1]) {
                consecutive_sorted++;
                if (consecutive_sorted > max_consecutive) {
                    max_consecutive = consecutive_sorted;
                }
            } else {
                out_of_order++;
                consecutive_sorted = 0;
            }
        }
    }

You have a condition which is only ever false on the very first iteration. You may wish to unroll this so that this branch is absent in the loop.

    min_val = arr[0];
    max_val = arr[0];

    // Single-pass analysis: extract data patterns
    for (int i = 1; i < n; i++) {
        // 1. Measure value range
        if (arr[i] < min_val) min_val = arr[i];
        if (arr[i] > max_val) max_val = arr[i];

        // 2. Measure sortedness and patterns
        if (arr[i] >= arr[i-1]) {
            consecutive_sorted++;
            if (consecutive_sorted > max_consecutive) {
                max_consecutive = consecutive_sorted;
            }
        } else {
            out_of_order++;
            consecutive_sorted = 0;
        }
    }
\$\endgroup\$
2
  • \$\begingroup\$ Thank you for this incredibly thorough and insightful review! You're absolutely right about: - Using enums for clarity (much more maintainable) - The switch refactoring for safer defaults - Eliminating magic numbers with named constants - The loop unrolling optimization (brilliant performance insight!) The loop optimization in particular is something I wouldn't have thought of - removing that branch prediction overhead is a great catch. Again this is incredibly valuable feedback. \$\endgroup\$ Commented Oct 11 at 0:09
  • \$\begingroup\$ @LazyCauchPotato I think the loop unrolling wouldn't do too much for performance. The CPU can do branch prediction: if an if statement always chooses the same branch, it won't affect performance too much. I would be interested to know how much you actually save. But another great benefit is reducing nesting. Code complexity, i.e. how hard it is to read and understand your code, scales very poorly with the nesting level. Before your greatest nesting level was 4 indents, after the change it is 3. Sounds small, but it is great actually. \$\endgroup\$ Commented Oct 13 at 10:03
6
\$\begingroup\$

I think overall readability OK.

There is an ambiguity in the documentation in the header: Minimal memory overhead (256 bytes) - does this mean maximal overhead is small, or minimal among all possible ordering routines, or overhead used here is never less than 256 bytes? If the 1st, the value stated is misleadingly understating. It would be too small even if the type chosen for counts was single byte due to call overhead. Where sizeof int == 8, expect more than 2K.

I can gleen that the array is ordered in place.
I don't know why I like it less that there is no stating in non-descending order.

I'd not prefix static names like bres_.
On 2nd take, bres_analyze_strategy() is a strange name.
Its code reads analyse data and decide strategy, indicating it does two things (connected by data, only).

Include don't rearrange in the enumeration of ordering methods.

In bres_insertion_sort()'s "Manual unrolling for small array-if-else", the controlled statements look identical.

While compilers take care of many things, I'm with say what you mean. "That loop" once more:

    unsigned char min_val, max_val = min_val = arr[0];
    unsigned char current, previous = min_val;

    // Single-pass analysis for extrema,
    //  max. length among ascending runs and total descends
    for (int i = 1; i < n; i++, previous = current) {
        current = arr[i];

        if (current >= previous) {
            if (current > max_val)
                max_val = current;
            consecutive_sorted++;
            if (consecutive_sorted > max_consecutive) {
                max_consecutive = consecutive_sorted;
            }
        } else {
            if (current < min_val)
                min_val = current;
            out_of_order++;
            consecutive_sorted = 0;
        }
    }

On 2nd take, the summary information is asymmetric between descending & ascending.

I'd start selecting the ordering method with

    if (out_of_order < 1)
        return NON_DESCENDING;

Without this, and the decision for range == 0 different from range < 16 nest the check for 0 in that for < 16.

Architecturally, hybrid_sort() is strange in

  • being a second place where to decide on the ordering procedure to use
  • repeating establishing the value range.

Re. Ultra-optimized insertion sort find how to control the inner loop with a single comparison.

Come to think of it, communicating the extrema to counting_sort() can reduce processing "0 entries". And the array size used - I don't think I'd go as far as trying "min-based" instead of 0-based.

\$\endgroup\$
2
  • \$\begingroup\$ Thank you, your review is pure gold, you're absolutely right about the architectural issues - having decision logic in multiple places is a maintenance nightmare. I'll refactor to separate data analysis from strategy selection. The memory documentation point is particularly embarrassing - thank you for catching that! I'll work on these refactors this week and update the GitHub repo \$\endgroup\$ Commented Oct 11 at 7:04
  • \$\begingroup\$ I have a full version ready that handles 32/64-bit and floating-point data. I'll start a new question for it. All comments were super helpful. I hope I didn't make the same mistakes in the full version. \$\endgroup\$ Commented Oct 12 at 0:26
4
\$\begingroup\$

In addition to the previous answers, here are some other minor suggestions.

Documentation

There are a lot of helpful comments. It would be good to explain what "BRE" stands for, if it is an acronym or what it means to you.

Since you tagged the question , it would be good to mention that in the comments as well.

Simpler

In the bres_analyze_strategy function, this code:

if (range == 0) {
    return 0; // All identical - counting sort instant
}

is unnecessary and can be deleted. The case where range is 0 is already covered by the following check:

if (range < 16) {

You can keep the comment:

// If range is 0: All identical - counting sort instant

Readability

When the number of operators starts adding up:

if (range < 32 || range < n / 8) {

I like to add more parentheses:

if ((range < 32) || (range < n / 8)) {

It helps a person group the terms without having to think as much.

\$\endgroup\$
3
  • \$\begingroup\$ Thanks for the valuable inputs, really appreciate! I've removed the redundant range check. Fixed documentation explaining 'BRE' stands for 'Bitwise Relationship Extraction', clarified comments. \$\endgroup\$ Commented Oct 11 at 17:48
  • \$\begingroup\$ @LazyCauchPotato: You're welcome. \$\endgroup\$ Commented Oct 12 at 10:41
  • \$\begingroup\$ We perhaps have different tastes in parens - probably I've had my fill of them when writing Lisp! \$\endgroup\$ Commented Oct 13 at 8:33
4
\$\begingroup\$

Compiler warnings

Code performs about 10 casual signed to/from unsigned conversions. Consider enabling more compiler warnings to identify.

Array sizing

As an alternative for qsort(), consider a signature more like void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));

For void BRESort_byte(unsigned char arr[], int n);, this would mean using size_t n instead of int n.

size_t is also the Goldilocks type for array sizing and indexing: not too narrow, not too wide.

This is also a fairer comparison against qsort(), which can handle arrays more than INT_MAX.

Since size_t is an unsigned type, code needs review to avoid negatives.

This also reduces the casual type conversions going on with mem...() calls.

Avoid naked magic numbers

Instead of values like 3, 8, 32, ... (with no explanation), use a self-documenting macro or named constant.

Use standard library ones when possible. E.g. rather than 255, UCHAR_MAX when assigning the maximal unsigned char, int count[256] = {0}; --> int count[UCHAR_MAX + 1] = {0};.

If code depends on an 8-bit char, use static_assert(CHAR_BIT == 8, "Must use 8-bit char");. I suspect CHAR_BIT <= 12 (or so) may be supportable.

Back up the claim

To avoid dismissing "3-4x faster than qsort", post deserves a test harness to demo it.

I could see this code using a dedicated sort function rather than qsort()'s function pointer accounts for a good deal of the improvement.

Organization

"SPECIALIZED BYTE DATA GENERATORS" deserve a separate .c file.

Minor: Argument order

(unsigned char arr[], int n) is more common that (int n, unsigned char arr[]) yet I find (int n, unsigned char arr[n]) or (int n, unsigned char arr[static n]) more useful as it allows various analysis inspections.

\$\endgroup\$
4
  • \$\begingroup\$ Really appreciate that you took your time to review the code! I will make these corrections in the full blown version of BRESort - this 'byte' version is a test run to assess the concept and get a reality check! The full version extends this intelligent approach to 32/64-bit integers and floating-point data. \$\endgroup\$ Commented Oct 11 at 17:56
  • \$\begingroup\$ As for the function pointer overhead in qsort, yeah... you're right. Changing my test \$\endgroup\$ Commented Oct 11 at 18:01
  • 2
    \$\begingroup\$ You were absolutely right about the function pointer overhead! My validation show significant gains in performance! \$\endgroup\$ Commented Oct 11 at 18:25
  • 1
    \$\begingroup\$ @LazyCauchPotato There are many was to sort FP given NaN. At least the sort criteria should explain how it handles NaNs. \$\endgroup\$ Commented Oct 13 at 14:52

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.