This C program implements a so-called natural merge sort that identifies existing runs in the list and exploits them in order to sort in sub-linearithmic time whenever possible. The running time of this sort is \$\Theta(n \log m)\$, where \$n\$ is the length of the list and \$m\$ is the number of ordered runs (ascending or descending sublists). Since \$m\$ is at most \$\lceil n / 2 \rceil\$, this runs in worst-case linearithmic time. Here we go:
linked_list.h
#ifndef LINKED_LIST_H
#define LINKED_LIST_H
#include <stdlib.h>
typedef struct linked_list_node_t {
int value;
struct linked_list_node_t* next;
} linked_list_node_t;
typedef struct {
linked_list_node_t* head;
linked_list_node_t* tail;
size_t size;
} linked_list_t;
void linked_list_init(linked_list_t* list);
void linked_list_append(linked_list_t* list, int value);
void linked_list_sort(linked_list_t* list);
int linked_list_is_sorted(linked_list_t* list);
void linked_list_display(linked_list_t* list);
#endif /* LINKED_LIST_H */
linked_list.c
#include "linked_list.h"
#include <stdlib.h>
#include <stdio.h>
void linked_list_init(linked_list_t* list)
{
list->head = NULL;
list->tail = NULL;
}
void linked_list_append(linked_list_t* list, int value)
{
linked_list_node_t* node = malloc(sizeof *node);
node->value = value;
node->next = NULL;
if (list->head)
{
list->tail->next = node;
list->tail = node;
}
else
{
list->head = node;
list->tail = node;
}
list->size++;
}
int linked_list_is_sorted(linked_list_t* list)
{
linked_list_node_t* node1;
linked_list_node_t* node2;
if (list->size < 2)
{
return 1;
}
node1 = list->head;
node2 = node1->next;
while (node2)
{
if (node1->value > node2->value)
{
return 0;
}
node1 = node2;
node2 = node2->next;
}
return 1;
}
void linked_list_display(linked_list_t* list)
{
char* separator = "";
for (linked_list_node_t* node = list->head; node; node = node->next)
{
printf("%s%d", separator, node->value);
separator = ", ";
}
}
static linked_list_node_t* reverse(linked_list_node_t* head)
{
linked_list_node_t* new_head = head;
linked_list_node_t* tmp_head;
tmp_head = head;
head = head->next;
tmp_head->next = NULL;
while (head)
{
tmp_head = head;
head = head->next;
tmp_head->next = new_head;
new_head = tmp_head;
}
return new_head;
}
static linked_list_node_t* merge(linked_list_node_t* left_list,
linked_list_node_t* right_list)
{
linked_list_node_t* merged_head = NULL;
linked_list_node_t* merged_tail = NULL;
linked_list_node_t* tmp_node;
if (left_list->value < right_list->value)
{
merged_head = left_list;
merged_tail = left_list;
left_list = left_list->next;
}
else
{
merged_head = right_list;
merged_tail = right_list;
right_list = right_list->next;
}
while (left_list && right_list)
{
if (left_list->value < right_list->value)
{
tmp_node = left_list;
left_list = left_list->next;
merged_tail->next = tmp_node;
merged_tail = tmp_node;
}
else
{
tmp_node = right_list;
right_list = right_list->next;
merged_tail->next = tmp_node;
merged_tail = tmp_node;
}
}
// Add the rest to the merged list:
if (left_list)
{
merged_tail->next = left_list;
}
else
{
merged_tail->next = right_list;
}
return merged_head;
}
typedef struct {
linked_list_node_t** run_length_array;
size_t head_index;
size_t tail_index;
size_t size;
size_t capacity;
} run_list_t;
static void run_list_enqueue(run_list_t* run_list, linked_list_node_t* run_head)
{
run_list->run_length_array[run_list->tail_index] = run_head;
run_list->tail_index = (run_list->tail_index + 1) % run_list->capacity;
run_list->size++;
}
static linked_list_node_t*
scan_ascending_run(run_list_t* run_list,
linked_list_node_t* run_start_node)
{
linked_list_node_t* node1 = run_start_node;
linked_list_node_t* probe;
linked_list_node_t* before_probe = NULL;
int last_read_integer = run_start_node->value;
probe = node1->next;
while (probe && last_read_integer <= probe->value)
{
last_read_integer = probe->value;
before_probe = probe;
probe = probe->next;
}
if (probe)
{
if (before_probe)
{
before_probe->next = NULL;
}
}
run_list_enqueue(run_list, run_start_node);
return probe;
}
static linked_list_node_t*
scan_descending_run(run_list_t* run_list,
linked_list_node_t* run_start_node)
{
linked_list_node_t* node1 = run_start_node;
linked_list_node_t* probe;
linked_list_node_t* before_probe = NULL;
int last_read_integer = run_start_node->value;
probe = node1->next;
while (probe && last_read_integer >= probe->value)
{
last_read_integer = probe->value;
before_probe = probe;
probe = probe->next;
}
if (probe)
{
if (before_probe)
{
before_probe->next = NULL;
}
}
run_start_node = reverse(run_start_node);
run_list_enqueue(run_list, run_start_node);
return probe;
}
static void run_list_build(run_list_t* run_list, linked_list_t* list)
{
linked_list_node_t* node1;
linked_list_node_t* node2;
linked_list_node_t* tmp_node;
run_list->capacity = list->size / 2 + 1;
run_list->size = 0;
run_list->run_length_array =
malloc(run_list->capacity * sizeof(linked_list_node_t*));
run_list->head_index = 0;
run_list->tail_index = 0;
node1 = list->head;
node2 = node1->next;
while (node1)
{
node2 = node1->next;
if (!node2)
{
run_list_enqueue(run_list, node1);
return;
}
// Get run direction (ascending 1,2,3... or descending 3,2,1):
if (node1->value <= node2->value)
{
node1 = scan_ascending_run(run_list, node1);
}
else
{
node1 = scan_descending_run(run_list, node1);
}
}
}
static linked_list_node_t* run_list_dequeue(run_list_t* run_list)
{
linked_list_node_t* ret = run_list->run_length_array[run_list->head_index];
run_list->head_index = (run_list->head_index + 1) % run_list->capacity;
run_list->size--;
return ret;
}
static size_t run_list_size(run_list_t* run_list)
{
return run_list->size;
}
void linked_list_sort(linked_list_t* list)
{
run_list_t run_list;
linked_list_node_t* left_run;
linked_list_node_t* right_run;
linked_list_node_t* merged_run;
if (!list || list->size < 2)
{
// Trivially sorted or non-existent list once here.
return;
}
run_list_build(&run_list, list);
while (run_list_size(&run_list) != 1)
{
left_run = run_list_dequeue(&run_list);
right_run = run_list_dequeue(&run_list);
merged_run = merge(left_run, right_run);
run_list_enqueue(&run_list, merged_run);
}
list->head = run_list_dequeue(&run_list);
}
main.c
#include "linked_list.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
void load_linked_list(linked_list_t* list, size_t len)
{
size_t i;
int value;
srand((unsigned int) time(NULL));
for (i = 0; i < len; ++i)
{
value = rand();
linked_list_append(list, value);
}
}
void load_presorted_list(linked_list_t* list, size_t len)
{
int i;
size_t chunk_length = len / 4;
for (i = 0; i < len; ++i)
{
linked_list_append(list, i % chunk_length);
}
}
static size_t milliseconds()
{
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec * 1000 + t.tv_usec / 1000;
}
int main(int argc, const char * argv[]) {
size_t ta;
size_t tb;
linked_list_t list;
linked_list_t large_list;
linked_list_t large_presorted_list;
linked_list_init(&list);
linked_list_init(&large_list);
linked_list_init(&large_presorted_list);
linked_list_append(&list, 5);
linked_list_append(&list, 1);
linked_list_append(&list, 2);
linked_list_append(&list, 9);
linked_list_append(&list, 6);
linked_list_append(&list, 7);
linked_list_append(&list, 10);
linked_list_append(&list, 8);
linked_list_append(&list, 4);
linked_list_append(&list, 3);
// Small demo:
linked_list_display(&list);
puts("");
linked_list_sort(&list);
linked_list_display(&list);
puts("");
// Large benchmark:
load_linked_list(&large_list, 1000 * 1000);
ta = milliseconds();
linked_list_sort(&large_list);
tb = milliseconds();
printf("Sorted large array in %zu milliseconds. Sorted: %d\n",
tb - ta,
linked_list_is_sorted(&large_list));
// Large presorted benchmark:
load_presorted_list(&large_presorted_list, 1000 * 1000);
ta = milliseconds();
linked_list_sort(&large_presorted_list);
tb = milliseconds();
printf("Sorted large presorted array in %zu milliseconds. Sorted: %d\n",
tb - ta,
linked_list_is_sorted(&large_presorted_list));
return 0;
}
Critique request
Please tell me how can I improve my C programming routine.