I've created BRESort - an adaptive sorting engine that dynamically selects
optimal algorithms based on data patterns. It achieves 3.6-4.2x speedup over stdlib qsort while preventing \$O(n^2)\$ catastrophes.
GitHub Repository: https://github.com/LazyCauchPotato/BRESort
Key Components:
bresort.h- Public API and data generatorsbresort.c- Core algorithm implementation- Test suites for validation and performance
Core Innovation:
The engine analyzes data in one pass (range, sortedness, patterns) to choose between counting sort, insertion sort, or hybrid approaches.
/* Core analysis function - the "brain" of BRESort */
static int bres_analyze_strategy(unsigned char arr[], int n) {
if (n <= 16) return 1; // Insertion sort for tiny arrays
unsigned char min_val = 255, max_val = 0;
int out_of_order = 0;
int consecutive_sorted = 0;
int max_consecutive = 0;
// Single-pass analysis: extract data patterns
for (int i = 0; i < n; i++) {
// 1. Measure value range
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
// 2. Measure sortedness and patterns
if (i > 0) {
if (arr[i] >= arr[i-1]) {
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
out_of_order++;
consecutive_sorted = 0;
}
}
}
unsigned char range = max_val - min_val;
// Decision tree based on extracted patterns
if (range == 0) return 0; // All identical - counting sort
if (range < 16) return 0; // Small range - counting sort
if (range < 64 && n > 100) return 2; // Medium complexity - hybrid
if (out_of_order < 3 || max_consecutive > n * 0.8) return 1; // Mostly sorted - insertion
if (n > 1000 && range > 128) return 2; // Large & complex - hybrid
return 0; // Default: counting sort (safe)
}
/* Main dispatch function */
void BRESort_byte(unsigned char arr[], int n) {
if (n <= 1) return;
// Tiny arrays always use insertion sort
if (n <= 8) {
bres_insertion_sort(arr, n);
return;
}
// Analyze and choose optimal strategy
int strategy = bres_analyze_strategy(arr, n);
switch (strategy) {
case 0: bres_counting_sort(arr, n); break; // Counting sort
case 1: bres_insertion_sort(arr, n); break; // Insertion sort
case 2: bres_hybrid_sort(arr, n); break; // Hybrid approach
default: bres_counting_sort(arr, n); break; // Safe fallback
}
}
/* Optimized counting sort for bytes */
static void bres_counting_sort(unsigned char arr[], int n) {
int count[256] = {0};
// Count occurrences
for (int i = 0; i < n; i++) {
count[arr[i]]++;
}
// Reconstruct sorted array
int index = 0;
for (int value = 0; value < 256; value++) {
int occurrences = count[value];
if (occurrences > 0) {
if (occurrences > 3) {
// Optimize runs with memset
memset(&arr[index], value, occurrences);
index += occurrences;
} else {
// Manual assignment for small counts
for (int j = 0; j < occurrences; j++) {
arr[index++] = value;
}
}
}
}
}
I'd appreciate any feedback on the overall architecture and C implementation.
bresort.c
#include "bresort.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
// ============================================================================
// OPTIMIZED CORE ALGORITHMS
// ============================================================================
/**
* Ultra-optimized insertion sort for small arrays
* Uses direct comparisons for maximum speed
*/
static void bres_insertion_sort(unsigned char arr[], int n) {
for (int i = 1; i < n; i++) {
unsigned char key = arr[i];
int j = i - 1;
// Manual unrolling for small arrays
if (n <= 8) {
while (j >= 0 && arr[j] > key) {
arr[j + 1] = arr[j];
j--;
}
} else {
// For slightly larger arrays, use standard approach
while (j >= 0 && arr[j] > key) {
arr[j + 1] = arr[j];
j--;
}
}
arr[j + 1] = key;
}
}
/**
* Optimized counting sort for bytes
* O(n) time complexity, perfect for byte data
*/
static void bres_counting_sort(unsigned char arr[], int n) {
if (n <= 1) return;
// Use stack allocation for small arrays, heap for large ones
int count[256] = {0};
// Single pass: count occurrences
for (int i = 0; i < n; i++) {
count[arr[i]]++;
}
// Reconstruct sorted array in-place
int index = 0;
for (int value = 0; value < 256; value++) {
int occurrences = count[value];
if (occurrences > 0) {
// Use memset for consecutive values (small optimization)
if (occurrences > 3) {
memset(&arr[index], value, occurrences);
index += occurrences;
} else {
// Manual assignment for small counts
for (int j = 0; j < occurrences; j++) {
arr[index++] = value;
}
}
}
}
}
/**
* Hybrid approach for medium-sized arrays with small range
*/
static void bres_hybrid_sort(unsigned char arr[], int n) {
unsigned char min_val = 255, max_val = 0;
// Find range in single pass
for (int i = 0; i < n; i++) {
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
}
unsigned char range = max_val - min_val;
// Choose strategy based on range characteristics
if (range < 32 || range < n / 8) {
bres_counting_sort(arr, n);
} else {
bres_insertion_sort(arr, n);
}
}
// ============================================================================
// STRATEGY SELECTION ENGINE - THE INTELLIGENT CORE
// ============================================================================
/**
* Analyzes byte array to choose optimal sorting strategy
* Returns: 0=counting, 1=insertion, 2=hybrid
*
* Decision Tree:
* - Very small arrays → Insertion sort
* - Small value range → Counting sort (O(n) performance)
* - Mostly sorted data → Insertion sort (excel on nearly-ordered)
* - Complex patterns → Hybrid approach (balanced performance)
* - Default fallback → Counting sort (safe choice)
*/
static int bres_analyze_strategy(unsigned char arr[], int n) {
if (n <= 16) {
return 1; // Insertion sort for very small arrays
}
unsigned char min_val = 255, max_val = 0;
int out_of_order = 0;
int consecutive_sorted = 0;
int max_consecutive = 0;
// Single-pass analysis for multiple metrics
for (int i = 0; i < n; i++) {
// Track min/max
if (arr[i] < min_val) min_val = arr[i];
if (arr[i] > max_val) max_val = arr[i];
// Track sortedness
if (i > 0) {
if (arr[i] >= arr[i-1]) {
consecutive_sorted++;
if (consecutive_sorted > max_consecutive) {
max_consecutive = consecutive_sorted;
}
} else {
out_of_order++;
consecutive_sorted = 0;
}
}
}
unsigned char range = max_val - min_val;
// Strategy decision tree
if (range == 0) {
return 0; // All identical - counting sort instant
}
if (range < 16) {
return 0; // Very small range - counting sort optimal
}
if (range < 64 && n > 100) {
return 2; // Medium range, large array - hybrid
}
if (out_of_order < 3 || max_consecutive > n * 0.8) {
return 1; // Mostly sorted - insertion sort
}
if (n > 1000 && range > 128) {
return 2; // Large array with large range - hybrid
}
return 0; // Default to counting sort
}
// ============================================================================
// MAIN BRESORT ALGORITHM
// ============================================================================
void BRESort_byte(unsigned char arr[], int n) {
if (n <= 1) return;
// For tiny arrays, always use insertion sort
if (n <= 8) {
bres_insertion_sort(arr, n);
return;
}
// Analyze and choose optimal strategy
int strategy = bres_analyze_strategy(arr, n);
switch (strategy) {
case 0: // Counting sort
bres_counting_sort(arr, n);
break;
case 1: // Insertion sort
bres_insertion_sort(arr, n);
break;
case 2: // Hybrid approach
bres_hybrid_sort(arr, n);
break;
default: // Should never happen
bres_counting_sort(arr, n);
break;
}
}
// ============================================================================
// VALIDATION FUNCTION
// ============================================================================
bool bres_is_sorted_byte(unsigned char arr[], int n) {
for (int i = 0; i < n - 1; i++) {
if (arr[i] > arr[i + 1]) {
return false;
}
}
return true;
}
// ============================================================================
// SPECIALIZED BYTE DATA GENERATORS
// ============================================================================
void bres_generate_bytes_random(unsigned char arr[], int n) {
for (int i = 0; i < n; i++) {
arr[i] = rand() % 256;
}
}
void bres_generate_bytes_sorted(unsigned char arr[], int n) {
if (n <= 1) {
if (n == 1) arr[0] = 0;
return;
}
for (int i = 0; i < n; i++) {
arr[i] = (i * 255) / (n - 1);
}
}
void bres_generate_bytes_reverse(unsigned char arr[], int n) {
if (n <= 1) {
if (n == 1) arr[0] = 255;
return;
}
for (int i = 0; i < n; i++) {
arr[i] = 255 - ((i * 255) / (n - 1));
}
}
void bres_generate_bytes_few_unique(unsigned char arr[], int n) {
unsigned char patterns[] = {0x00, 0x0F, 0xF0, 0xFF, 0x55, 0xAA};
for (int i = 0; i < n; i++) {
arr[i] = patterns[rand() % 6];
}
}
void bres_generate_bytes_all_zeros(unsigned char arr[], int n) {
memset(arr, 0, n);
}
void bres_generate_bytes_all_ones(unsigned char arr[], int n) {
memset(arr, 0xFF, n);
}
void bres_generate_bytes_alternating(unsigned char arr[], int n) {
for (int i = 0; i < n; i++) {
arr[i] = (i % 2) ? 0xAA : 0x55;
}
}
void bres_generate_bytes_realistic(unsigned char arr[], int n) {
// Simulates real-world byte data (like image pixels)
for (int i = 0; i < n; i++) {
// Create clusters of similar values (common in real data)
if (rand() % 100 < 70) { // 70% of values in clusters
int base = rand() % 240;
arr[i] = base + (rand() % 16);
} else { // 30% random
arr[i] = rand() % 256;
}
}
}
bresort.h
#ifndef BRESORT_H
#define BRESORT_H
#include <stdbool.h>
/**
* BRESort_byte - Ultimate byte sorter (3-4x faster than qsort)
* @arr: Array of unsigned chars (bytes) to sort
* @n: Number of elements in array
*
* Features:
* - 3.72x faster than qsort on random data
* - Adaptive strategy selection
* - Optimized for byte data (0-255)
* - Minimal memory overhead (256 bytes)
*/
void BRESort_byte(unsigned char arr[], int n);
/**
* bres_is_sorted_byte - Validates byte array sorting
* @arr: Array to check
* @n: Number of elements
* Returns: true if sorted, false otherwise
*/
bool bres_is_sorted_byte(unsigned char arr[], int n);
#endif
bres_insertion_sort()'s Manual unrolling for small array-if-else, the controlled statements look identical. \$\endgroup\$BRESort_byte(): Do you plan to come up with routines for different keys/records? \$\endgroup\$stdlibqsort" -- there is no single "stdlibqsort". Your sort may outperform theqsortof your particular C implementation, but that's not the same thing. \$\endgroup\$