Skip to main content
2 of 4
added 23 characters in body
Madagascar
  • 10.1k
  • 1
  • 16
  • 52

A small header-only input output library

The library (inspired by stb libraries) attempts to provide some commonly used functions (reading a file into memory, determining the size of a file) that are missing from the C standard library portably (for my own use cases).

Code:

#ifndef IO_H
#define IO_H

#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>

/* 
 * To use, do this:
 *   #define IO_IMPLEMENTATION
 * before you include this file in *one* C file to create the implementation.
 *
 * i.e. it should look like:
 * #include ...
 * #include ...
 *
 * #define IO_IMPLEMENTATION
 * #include "io.h"
 * ...
 * 
 * To make all functions have internal linkage, i.e. be private to the source
 * file, do this:
 *  #define `IO_STATIC` 
 * before including "io.h".
 *
 * i.e. it should look like:
 * #define IO_IMPLEMENTATION
 * #define IO_STATIC
 * #include "io.h"
 * ...
 *
 * You can #define IO_MALLOC, IO_REALLOC, and IO_FREE to avoid using malloc(),
 * realloc(), and free(). Note that all three must be defined at once, or none.
 */

#ifndef IO_DEF
#ifdef IO_STATIC
#define IO_DEF  static
#else
#define IO_DEF  extern
#endif                          /* IO_STATIC */
#endif                          /* IO_DEF */

#if defined(__GNUC__) || defined(__clang__)
#define ATTRIB_NONNULL(...)              __attribute__((nonnull (__VA_ARGS__)))
#define ATTRIB_WARN_UNUSED_RESULT        __attribute__((warn_unused_result))
#else
#define ATTRIB_NONNULL(...)              /* If only. */
#define ATTRIB_WARN_UNUSED_RESULT        /* If only. */
#endif                          /* defined(__GNUC__) || define(__clang__) */

/* 
 * Reads the file pointed to by `stream` to a buffer and returns it.
 * The returned buffer is a null-terminated string.
 * If `nbytes` is not NULL, it shall hold the size of the file.
 * 
 * Returns NULL on memory allocation failure. The caller is responsible for
 * freeing the returned pointer.
 */
IO_DEF char *io_read_file(FILE *stream,
    size_t *nbytes) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;

/* 
 * Splits a string into a sequence of tokens. The `delim` argument 
 * specifies a set of bytes that delimit the tokens in the parsed string.
 * If `ntokens` is not NULL, it shall hold the amount of total tokens.
 *
 * Returns an array of pointers to the tokens, or NULL on memory allocation
 * failure. The caller is responsible for freeing the returned pointer.
 */
IO_DEF char **io_split_by_delim(char *s, const char *delim,
    size_t *ntokens) ATTRIB_NONNULL(1, 2) ATTRIB_WARN_UNUSED_RESULT;

/* 
 * Splits a string into lines.
 * A wrapper around `io_split_by_delim()`. It calls the function with "\n" as
 * the delimiter.
 *
 * Returns an array of pointers to the tokens, or NULL on memory allocation
 * failure. The caller is responsible for freeing the returned pointer.
 */
IO_DEF char **io_split_lines(char *s,
    size_t *nlines) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;

/* 
 * Reads the next line from the stream pointed to by `stream`. The returned line 
 * is null-terminated and does not contain a newline, if one was found.
 *
 * The memory pointed to by `size` shall contain the length of the 
 * line (including the terminating null character). Else it shall contain 0.
 *  
 * Upon successful completion a pointer is returned and the size of the line is 
 * stored in the memory pointed to by `size`, otherwise NULL is returned and
 * `size` holds 0.
 * 
 * `fgetline()` does not distinguish between end-of-file and error; the routines
 * `feof()` and `ferror()` must be used to determine which occurred. The
 * function also returns NULL on a memory-allocation failure. 
 *
 * Although a null character is always supplied after the line, note that
 * `strlen(line)` will always be smaller than the value is `size` if the line
 * contains embedded null characters.
 */
IO_DEF char *io_read_line(FILE *stream, size_t *size) ATTRIB_NONNULL(1,
    2) ATTRIB_WARN_UNUSED_RESULT;

/*
 * `size` must be a non-null pointer. On success, the function assigns `size`
 * with the number of bytes read and returns true, or returns false elsewise.
 *
 * Note: The file can grow between io_fsize() and a subsequent read.
 */
IO_DEF bool io_fsize(FILE *stream, size_t *size) ATTRIB_NONNULL(1, 2);

/* 
 * Writes `lines` to the file pointed to by `stream`.
 *
 * On success, it returns true, or false elsewise.
 */
IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
    char *lines[const static nlines]) ATTRIB_NONNULL(1, 3);

/* 
 * Writes nbytes from the buffer pointed to by `data` to the file pointed to 
 * by `stream`. 
 *
 * On success, it returns true, or false elsewise.
 */
IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
    const char data[static nbytes]) ATTRIB_NONNULL(1, 3);

#endif                          /* IO_H */

#ifdef IO_IMPLEMENTATION

#if defined(IO_MALLOC) && defined(IO_REALLOC) && defined(IO_FREE)
// Ok.
#elif !defined(IO_MALLOC) && !defined(IO_REALLOC) && !defined(IO_FREE)
// Ok.
#else
#error  "Must define all or none of IO_MALLOC, IO_REALLOC, and IO_FREE."
#endif

#ifndef IO_MALLOC
#define IO_MALLOC(sz)       malloc(sz)
#define IO_REALLOC(p, sz)   realloc(p, sz)
#define IO_FREE(p)          free(p)
#endif

#include <stdlib.h>
#include <string.h>
#include <string.h>

#define CHUNK_SIZE          INT64_C(1024 * 8)
#define TOKEN_CHUNK_SIZE    INT64_C(1024 * 2)

IO_DEF char *io_read_file(FILE *stream, size_t *nbytes)
{
    static const size_t page_size = CHUNK_SIZE;
    char *content = NULL;
    size_t len = 0;

    for (size_t rcount = 1; rcount; len += rcount) {
        void *const tmp = IO_REALLOC(content, len + page_size);

        if (!tmp) {
            IO_FREE(content);
            return content = NULL;
        }
        content = tmp;
        rcount = fread(content + len, 1, page_size - 1, stream);

        if (ferror(stream)) {
            IO_FREE(content);
            return content = NULL;
        }
    }

    if (nbytes) {
        *nbytes = len;
    }
    content[len] = '\0';
    return content;
}

IO_DEF char **io_split_by_delim(char *s, const char *delim, size_t *ntokens)
{
    char **tokens = NULL;
    const size_t chunk_size = TOKEN_CHUNK_SIZE;
    size_t capacity = 0;
    size_t token_count = 0;

    while (*s) {
        if (token_count >= capacity) {
            char **const tmp = IO_REALLOC(tokens,
                sizeof *tokens * (capacity += chunk_size));

            if (!tmp) {
                IO_FREE(tokens);
                return NULL;
            }
            tokens = tmp;
        }
        tokens[token_count++] = s;
        s = strpbrk(s, delim);

        if (s) {
            *s++ = '\0';
        }
    }

    if (ntokens) {
        *ntokens = token_count;
    }
    return tokens;

}

IO_DEF char **io_split_lines(char *s, size_t *nlines)
{
    return io_split_by_delim(s, "\n", nlines);
}

IO_DEF char *io_read_line(FILE *stream, size_t *size)
{
    const size_t page_size = BUFSIZ;
    size_t count = 0;
    size_t capacity = 0;
    char *line = NULL;

    for (;;) {
        if (count >= capacity) {
            char *const tmp = realloc(line, capacity += page_size);

            if (!tmp) {
                free(line);
                return NULL;
            }

            line = tmp;
        }

        int c = getc(stream);

        if (c == EOF || c == '\n') {
            if (c == EOF) {
                if (feof(stream)) {
                    if (!count) {
                        free(line);
                        return NULL;
                    }
                    /* Return what was read. */
                    break;
                }
                free(line);
                return NULL;
            } else {
                break;
            }
        } else {
            line[count] = (char) c;
        }
        ++count;
    }

    /* Shrink line to size if possible. */
    void *tmp = realloc(line, count + 1);

    if (tmp) {
        line = tmp;
    }

    line[count] = '\0';
    *size = ++count;
    return line;
}

/* 
 * Reasons to not use `fseek()` and `ftell()` to compute the size of the file:
 * 
 * Subclause 7.12.9.2 of the C Standard [ISO/IEC 9899:2011] specifies the
 * following behavior when opening a binary file in binary mode:
 * 
 * >> A binary stream need not meaningfully support fseek calls with a whence 
 * >> value of SEEK_END.
 *
 * In addition, footnote 268 of subclause 7.21.3 says:
 *
 * >> Setting the file position indicator to end-of-file, as with 
 * >> fseek(file, 0, SEEK_END) has undefined behavior for a binary stream.
 *
 * For regular files, the file position indicator returned by ftell() is useful
 * only in calls to fseek. As such, the value returned may not be reflect the 
 * physical byte offset. 
 *
 * Reasons to not use other non-standard functions:
 * 1) fseeko()/ftello() - Not specified in the C Standard, and have the same 
 *                        problem as the method above. 
 * 
 * 2) fstat()/stat() - Not specified in the C standard. POSIX specific.
 * 3) _filelength()/_filelengthi64()/GetFileSizeEx() - Not specified in the C
 *                                                     Standard. Windows
 *                                                     specific.
 * As such, we read the file in chunks, which is the only portable way to
 * determine the size of a file.
 *
 * Yet this is highly inefficient. We could use some #ifdefs and revert to
 * platform-specific functions.
 */
IO_DEF bool io_fsize(FILE *stream, size_t *size)
{
/* TODO:
 *
 * #if defined(_WIN32)
 *   Windows also supports fileno(), struct stat, and fstat() as _fileno(),
 *   _fstat(), and struct _stat.
 *
 *   #ifdef _WIN32 
 *   #define fstat  _fstat
 *   #define fileno _fileno
 *   #define stat   _stat 
 *   #endif
 *
 *   But which version to use? See: 
 *   learn.microsoft/en-us/cpp/c-runtime-library/reference/fstat-fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
 * #elif defined(__unix__) || defined(__linux__) et cetera
 *      struct stat st;
 *
 *      if (fstat(fileno(stream), st) == 0) {
 *          return st.size;
 *      }
 *      return -1?
 *          
 *      }
 * #else 
 *   Fall back to the default and read it in chunks.
 * #endif
 */
    size_t rcount = 0;
    char chunk[CHUNK_SIZE];

    while ((rcount = fread(chunk, 1, CHUNK_SIZE, stream)) > 0) {
        *size += rcount;
    }

    return !ferror(stream);
}

IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
    char *lines[const static nlines])
{
    for (size_t i = 0; i < nlines; ++i) {
        if (fprintf(stream, "%s\n", lines[i]) < 0) {
            return false;
        }
    }

    return true;
}

IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
    const char data[static nbytes])
{
    size_t nwritten = fwrite(data, 1, nbytes, stream);

    if (nwritten != nbytes || ferror(stream)) {
        return false;
    }

    return true;
}

#undef CHUNK_SIZE
#undef TOKEN_CHUNK_SIZE
#endif                          /* IO_IMPLEMENTATION */

And here's how it can be used:

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#define IO_IMPLEMENTATION
#define IO_STATIC
#include "io.h"

int main(int argc, char **argv)
{
    if (argc == 2) {
        FILE *fp = fopen(argv[1], "rb");
        assert(fp);
        
        size_t nbytes = 0;
        char *const fbuf = io_read_file(fp, &nbytes);
        assert(fbuf);
        assert(io_write_file(stdout, nbytes, fbuf));

        rewind(fp);

        size_t size = 0;
        bool rv = io_fsize(fp, &size);
        assert(rv);
        printf("Filesize: %zu.\n", size);

        rewind(fp);

        /* size_t nlines = 0; */
        /* char **lines = io_split_lines(fbuf, &nlines); */
        /* assert(lines); */
        /* assert(io_write_lines(stdout, nlines, lines)); */
        
        size_t ntokens = 0;
        char **tokens = io_split_by_delim(fbuf, " \f\n\r\v\t", &ntokens);
        assert(tokens);
        assert(io_write_lines(stdout, ntokens, tokens));

        free(fbuf);
        free(tokens);
        /* free(lines); */    
        fclose(fp);
    }
    
    return EXIT_SUCCESS;
}

Review Request:

Are there any bugs or undefined/implementation-defined behavior in the code?

General coding comments, style, bad practices, et cetera.

Madagascar
  • 10.1k
  • 1
  • 16
  • 52