I've been reimplementing the fgrep utility in C for fun & study, and so had to implement the Aho-Corasick algorithm.
I'm trying to improve my C skills in general, so any and all feedback would be greatly appreciated.
The project is compiled using gcc and gnu17.
Thanks in advance :]
aho_corasick.h/c
/*
 * FILE: aho_corasick.h
 * ABOUT: Definitions for the aho-corasick string matching algorithm.
 *        See https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
 */
#ifndef AHO_CORASIC_H
#define AHO_CORASIC_H
#include <stdlib.h>
#include <sys/queue.h>
struct ac_node;
typedef struct ac_node ac_node_t;
typedef struct ac_automaton {
    ac_node_t *root;
} ac_automaton_t;
/**
 * Creates a new automaton used in string matching.
 * @param words Dictionary of words to be matched against.
 * @param wc Number of words in words.
 * @return An automaton build from the dictionary.
 */
ac_automaton_t new_automaton(const char **words, size_t wc);
/**
 * Frees the memory allocated by the automaton.
 * @param automaton Automaton to be deleted.
 */
void delete_automaton(ac_automaton_t automaton);
typedef struct match
{
    size_t start, length;
    LIST_ENTRY(match) entries;
} match_t;
typedef LIST_HEAD(match_list_head, match) match_list_head_t;
/**
 * Frees the memory allocated by the match-list.
 * @param head Head of the match-list to be deleted.
 */
void delete_match_list(match_list_head_t *head);
/**
 * Finds all substring matching an of the automatons dictionary.
 * @param str String to be searched.
 * @param automaton Automaton used in matching.
 * @return A list of matches in order of appearence in str.
 */
match_list_head_t *match(const char *str, const ac_automaton_t automaton);
#endif
/*
 * FILE: aho_corasick.c
 * ABOUT: Implementation of the aho-corasick string matching algorithm defined in aho_corasick.h.
 *        See https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
 */
#include <stdlib.h>
#include <sys/queue.h>
#include <stdbool.h>
#include "aho_corasick.h"
#define ASCII_CHAR_COUNT 256 // No. of valid (extended) ASCII characters
struct ac_node
{
    char val;
    bool is_final;
    size_t length;
    struct ac_node *parent;
    struct ac_node *children[ASCII_CHAR_COUNT];
    struct ac_node *suffix_link, *dict_suffix_link;
};
ac_node_t *new_node(ac_node_t *parent, char val)
{
    ac_node_t *node = calloc(1, sizeof(ac_node_t));
    node->parent = parent;
    node->val = val;
    node->length = parent == NULL
        ? 0
        : parent->length + 1;
    
    return node;
}
typedef struct node_q_entry
{
    ac_node_t *node;
    STAILQ_ENTRY(node_q_entry) entries;
} node_q_entry_t;
typedef STAILQ_HEAD(node_q_head, node_q_entry) node_q_head_t;
static void enqueue(node_q_head_t *head, ac_node_t *node)
{
    node_q_entry_t *entry = malloc(sizeof(node_q_entry_t));
    entry->node = node;
    STAILQ_INSERT_TAIL(head, entry, entries);
}
static ac_node_t *dequeue(node_q_head_t *head)
{
    node_q_entry_t *entry = STAILQ_FIRST(head);
    STAILQ_REMOVE_HEAD(head, entries);
    ac_node_t *node = entry->node;
    free(entry);
    return node;
}
static void insert_word(ac_node_t *root, const char *word)
{
    ac_node_t *node = root;
    while (*word != '\0')
    {
        int c = *word;
        if (node->children[c] == NULL)
            node->children[c] = new_node(node, c);
        node = node->children[c];
        ++word;
    }
    node->is_final = true;
}
static bool is_root(const ac_node_t *node)
{
    return node->parent == NULL;
}
static ac_node_t *find_suffix_link(ac_node_t *node)
{
    if (is_root(node)) return NULL;
    if (is_root(node->parent)) return node->parent;
    int val = node->val;
    node = node->parent->suffix_link;
    while (node->children[val] == NULL)
    {
        if (is_root(node)) return node;
        node = node->parent;
    }
    return node->children[val];
}
static ac_node_t *find_dict_suffix_link(ac_node_t *node)
{
    node = node->suffix_link;
    while (node != NULL)
    {
        if (node->is_final) return node;
        node = node->suffix_link;
    }
    
    return NULL;
}
ac_automaton_t new_automaton(const char **words, size_t wc)
{
    ac_node_t *root = new_node(NULL, '\0');
    // Insert words
    for (size_t i = 0; i < wc; ++i)
    {
        const char *w = words[i];
        insert_word(root, w);
    }
    
    // Add (dict) suffix links using breadth-first search
    node_q_head_t head;
    STAILQ_INIT(&head);
    enqueue(&head, root);
    while (!STAILQ_EMPTY(&head))
    {
        ac_node_t *node = dequeue(&head);
        node->suffix_link = find_suffix_link(node);
        node->dict_suffix_link = find_dict_suffix_link(node);
        for (int i = 0; i < ASCII_CHAR_COUNT; ++i)
        {
            ac_node_t *child = node->children[i];
            if (child != NULL) enqueue(&head, child);
        }       
    }
    return (ac_automaton_t) { .root = root };
}
void delete_automaton(ac_automaton_t automaton)
{
    node_q_head_t head;
    STAILQ_INIT(&head);
    enqueue(&head, automaton.root);
    while (!STAILQ_EMPTY(&head))
    {
        ac_node_t *node = dequeue(&head);
        for (int i = 0; i < ASCII_CHAR_COUNT; ++i)
        {
            ac_node_t *child = node->children[i];
            if (child != NULL) enqueue(&head, child);
        }       
        free(node);
    }
}
void delete_match_list(match_list_head_t *head)
{
    match_t *m1 = LIST_FIRST(head);
    while (m1 != NULL) {
        match_t *m2 = LIST_NEXT(m1, entries);
        free(m1);
        m1 = m2;
    }
    LIST_INIT(head);
}
static void add_match(match_list_head_t *head, size_t pos, size_t length)
{
    static match_t *last_match = NULL;
    match_t *m = malloc(sizeof(match_t));
    m->start = pos - length + 1;
    m->length = length;
    if (last_match == NULL)
        LIST_INSERT_HEAD(head, m, entries);
    else
        LIST_INSERT_AFTER(last_match, m, entries);
    last_match = m;
}
match_list_head_t *match(const char *str, const ac_automaton_t automaton)
{
    match_list_head_t *head = malloc(sizeof(match_list_head_t));
    LIST_INIT(head);
    ac_node_t *node = automaton.root;
    size_t pos = 0;
    for (int c = *str; c != '\0'; c = *(++str))
    {
        // Follow children & suffix links
        while (node->children[c] == NULL)
        {
            node = node->suffix_link;
            if (node == NULL) break;
        }
        node = node == NULL
            ? automaton.root
            : node->children[c];
        // Look for matches
        if (node->is_final)
            add_match(head, pos, node->length);
        ac_node_t *dict_node = node->dict_suffix_link;
        while (dict_node != NULL)
        {
            add_match(head, pos, dict_node->length);
            dict_node = dict_node->dict_suffix_link;
        }
        ++pos;
    }
    
    return head;
}
test.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/queue.h>
#include "src/aho_corasick.h"
int main(void)
{
    // wikipedia examples
    const char* wiki_words[] = {
        "a",
        "ab",
        "bab",
        "bc",
        "bca",
        "c",
        "caa",
    };
    size_t wiki_wc = sizeof(wiki_words)/sizeof(wiki_words[0]);
    ac_automaton_t wiki_automaton = new_automaton(wiki_words, wiki_wc);
    match_list_head_t *head = match("abccab", wiki_automaton);
    match_t *m = LIST_FIRST(head);
    assert(m->start == 0);
    assert(m->length == 1);
    m = LIST_NEXT(m, entries);
    assert(m->start == 0);
    assert(m->length == 2);
    
    m = LIST_NEXT(m, entries);
    assert(m->start == 1);
    assert(m->length == 2);
    
    m = LIST_NEXT(m, entries);
    assert(m->start == 2);
    assert(m->length == 1);
    
    m = LIST_NEXT(m, entries);
    assert(m->start == 3);
    assert(m->length == 1);
    
    m = LIST_NEXT(m, entries);
    assert(m->start == 4);
    assert(m->length == 1);
    
    m = LIST_NEXT(m, entries);
    assert(m->start == 4);
    assert(m->length == 2);
    m = LIST_NEXT(m, entries);
    assert(m == NULL);
    
    delete_match_list(head);
    assert(LIST_EMPTY(head));
    delete_automaton(wiki_automaton);
    puts("Tests ran succesfully!");
    return EXIT_SUCCESS;
}
Makefile
SRC_DIR := ./src
OBJ_DIR := ./obj
BIN_DIR := ./bin
CC := gcc
CFLAGS := -Wall -ggdb
$(OBJ_DIR)/aho_corasick.o: $(SRC_DIR)/aho_corasick.h $(SRC_DIR)/aho_corasick.c
    @mkdir -p $(OBJ_DIR)
    $(CC) -c -o $(OBJ_DIR)/aho_corasick.o $(SRC_DIR)/aho_corasick.c $(CFLAGS)
.PHONY: test clean
test: $(OBJ_DIR)/aho_corasick.o
    @mkdir -p $(BIN_DIR)
    $(CC) -o $(BIN_DIR)/test test.c $(OBJ_DIR)/* $(CFLAGS)
    $(BIN_DIR)/test
clean:
    rm -rf $(BIN_DIR) $(OBJ_DIR)
Project Layout
.
├── Makefile
├── src
│   ├── aho_corasick.c
│   └── aho_corasick.h
└── test.c
    
calloc()/malloc()failure properly. Review later. Kindly specify whether you're using C99, C11, C17, or C23. \$\endgroup\$truewithout including<stdbool.h>. Are you perchance not compiling with warnings enabled, or compiling withstd=c2x/c23/gnu2x/gnu23, or ignoring the warnings? Can you include the contents of the Makefile? \$\endgroup\$sys/queue.halbeit, and doesn't know abouttrue,false, andboolunless you incude<stdbool.h>. The Linux man page states thatsys/queue.his part of the BSD Standard, so not even POSIX. \$\endgroup\$gcc 11.3.0which should default tognu17, sorry for the confusion 🤦).stdboolshould have been imported at the beginning ofaho_corasick.c. \$\endgroup\$