The library (inspired by stb libraries) attempts to provide some commonly used functions (reading a file into memory, determining the size of a file) that are missing from the C standard library portably (for my own use cases).
Code:
#ifndef IO_H
#define IO_H
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
/*
* To use, do this:
* #define IO_IMPLEMENTATION
* before you include this file in *one* C file to create the implementation.
*
* i.e. it should look like:
* #include ...
* #include ...
*
* #define IO_IMPLEMENTATION
* #include "io.h"
* ...
*
* To make all functions have internal linkage, i.e. be private to the source
* file, do this:
* #define `IO_STATIC`
* before including "io.h".
*
* i.e. it should look like:
* #define IO_IMPLEMENTATION
* #define IO_STATIC
* #include "io.h"
* ...
*
* You can #define IO_MALLOC, IO_REALLOC, and IO_FREE to avoid using malloc(),
* realloc(), and free(). Note that all three must be defined at once, or none.
*/
#ifndef IO_DEF
#ifdef IO_STATIC
#define IO_DEF static
#else
#define IO_DEF extern
#endif /* IO_STATIC */
#endif /* IO_DEF */
#if defined(__GNUC__) || defined(__clang__)
#define ATTRIB_NONNULL(...) __attribute__((nonnull (__VA_ARGS__)))
#define ATTRIB_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#else
#define ATTRIB_NONNULL(...) /* If only. */
#define ATTRIB_WARN_UNUSED_RESULT /* If only. */
#endif /* defined(__GNUC__) || define(__clang__) */
/*
* Reads the file pointed to by `stream` to a buffer and returns it.
* The returned buffer is a null-terminated string.
* If `nbytes` is not NULL, it shall hold the size of the file.
*
* Returns NULL on memory allocation failure. The caller is responsible for
* freeing the returned pointer.
*/
IO_DEF char *io_read_file(FILE *stream,
size_t *nbytes) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;
/*
* Splits a string into a sequence of tokens. The `delim` argument
* specifies a set of bytes that delimit the tokens in the parsed string.
* If `ntokens` is not NULL, it shall hold the amount of total tokens.
*
* Returns an array of pointers to the tokens, or NULL on memory allocation
* failure. The caller is responsible for freeing the returned pointer.
*/
IO_DEF char **io_split_by_delim(char *s, const char *delim,
size_t *ntokens) ATTRIB_NONNULL(1, 2) ATTRIB_WARN_UNUSED_RESULT;
/*
* Splits a string into lines.
* A wrapper around `io_split_by_delim()`. It calls the function with "\n" as
* the delimiter.
*
* Returns an array of pointers to the tokens, or NULL on memory allocation
* failure. The caller is responsible for freeing the returned pointer.
*/
IO_DEF char **io_split_lines(char *s,
size_t *nlines) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;
/*
* Reads the next line from the stream pointed to by `stream`. The returned line
* is null-terminated and does not contain a newline, if one was found.
*
* The memory pointed to by `size` shall contain the length of the
* line (including the terminating null character). Else it shall contain 0.
*
* Upon successful completion a pointer is returned and the size of the line is
* stored in the memory pointed to by `size`, otherwise NULL is returned and
* `size` holds 0.
*
* `fgetline()` does not distinguish between end-of-file and error; the routines
* `feof()` and `ferror()` must be used to determine which occurred. The
* function also returns NULL on a memory-allocation failure.
*
* Although a null character is always supplied after the line, note that
* `strlen(line)` will always be smaller than the value is `size` if the line
* contains embedded null characters.
*/
IO_DEF char *io_read_line(FILE *stream, size_t *size) ATTRIB_NONNULL(1,
2) ATTRIB_WARN_UNUSED_RESULT;
/*
* `size` must be a non-null pointer. On success, the function assigns `size`
* with the number of bytes read and returns true, or returns false elsewise.
*
* Note: The file can grow between io_fsize() and a subsequent read.
*/
IO_DEF bool io_fsize(FILE *stream, size_t *size) ATTRIB_NONNULL(1, 2);
/*
* Writes `lines` to the file pointed to by `stream`.
*
* On success, it returns true, or false elsewise.
*/
IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
char *lines[const static nlines]) ATTRIB_NONNULL(1, 3);
/*
* Writes nbytes from the buffer pointed to by `data` to the file pointed to
* by `stream`.
*
* On success, it returns true, or false elsewise.
*/
IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
const char data[static nbytes]) ATTRIB_NONNULL(1, 3);
#endif /* IO_H */
#ifdef IO_IMPLEMENTATION
#if defined(IO_MALLOC) && defined(IO_REALLOC) && defined(IO_FREE)
// Ok.
#elif !defined(IO_MALLOC) && !defined(IO_REALLOC) && !defined(IO_FREE)
// Ok.
#else
#error "Must define all or none of IO_MALLOC, IO_REALLOC, and IO_FREE."
#endif
#ifndef IO_MALLOC
#define IO_MALLOC(sz) malloc(sz)
#define IO_REALLOC(p, sz) realloc(p, sz)
#define IO_FREE(p) free(p)
#endif
#include <stdlib.h>
#include <string.h>
#include <string.h>
#define CHUNK_SIZE INT64_C(1024 * 8)
#define TOKEN_CHUNK_SIZE INT64_C(1024 * 2)
IO_DEF char *io_read_file(FILE *stream, size_t *nbytes)
{
static const size_t page_size = CHUNK_SIZE;
char *content = NULL;
size_t len = 0;
for (size_t rcount = 1; rcount; len += rcount) {
void *const tmp = IO_REALLOC(content, len + page_size);
if (!tmp) {
IO_FREE(content);
return content = NULL;
}
content = tmp;
rcount = fread(content + len, 1, page_size - 1, stream);
if (ferror(stream)) {
IO_FREE(content);
return content = NULL;
}
}
if (nbytes) {
*nbytes = len;
}
content[len] = '\0';
return content;
}
IO_DEF char **io_split_by_delim(char *s, const char *delim, size_t *ntokens)
{
char **tokens = NULL;
const size_t chunk_size = TOKEN_CHUNK_SIZE;
size_t capacity = 0;
size_t token_count = 0;
while (*s) {
if (token_count >= capacity) {
char **const tmp = IO_REALLOC(tokens,
sizeof *tokens * (capacity += chunk_size));
if (!tmp) {
IO_FREE(tokens);
return NULL;
}
tokens = tmp;
}
tokens[token_count++] = s;
s = strpbrk(s, delim);
if (s) {
*s++ = '\0';
}
}
if (ntokens) {
*ntokens = token_count;
}
return tokens;
}
IO_DEF char **io_split_lines(char *s, size_t *nlines)
{
return io_split_by_delim(s, "\n", nlines);
}
IO_DEF char *io_read_line(FILE *stream, size_t *size)
{
const size_t page_size = BUFSIZ;
size_t count = 0;
size_t capacity = 0;
char *line = NULL;
for (;;) {
if (count >= capacity) {
char *const tmp = realloc(line, capacity += page_size);
if (!tmp) {
free(line);
return NULL;
}
line = tmp;
}
int c = getc(stream);
if (c == EOF || c == '\n') {
if (c == EOF) {
if (feof(stream)) {
if (!count) {
free(line);
return NULL;
}
/* Return what was read. */
break;
}
free(line);
return NULL;
} else {
break;
}
} else {
line[count] = (char) c;
}
++count;
}
/* Shrink line to size if possible. */
void *tmp = realloc(line, count + 1);
if (tmp) {
line = tmp;
}
line[count] = '\0';
*size = ++count;
return line;
}
/*
* Reasons to not use `fseek()` and `ftell()` to compute the size of the file:
*
* Subclause 7.12.9.2 of the C Standard [ISO/IEC 9899:2011] specifies the
* following behavior when opening a binary file in binary mode:
*
* >> A binary stream need not meaningfully support fseek calls with a whence
* >> value of SEEK_END.
*
* In addition, footnote 268 of subclause 7.21.3 says:
*
* >> Setting the file position indicator to end-of-file, as with
* >> fseek(file, 0, SEEK_END) has undefined behavior for a binary stream.
*
* For regular files, the file position indicator returned by ftell() is useful
* only in calls to fseek. As such, the value returned may not be reflect the
* physical byte offset.
*
* Reasons to not use other non-standard functions:
* 1) fseeko()/ftello() - Not specified in the C Standard, and have the same
* problem as the method above.
*
* 2) fstat()/stat() - Not specified in the C standard. POSIX specific.
* 3) _filelength()/_filelengthi64()/GetFileSizeEx() - Not specified in the C
* Standard. Windows
* specific.
* As such, we read the file in chunks, which is the only portable way to
* determine the size of a file.
*
* Yet this is highly inefficient. We could use some #ifdefs and revert to
* platform-specific functions.
*/
IO_DEF bool io_fsize(FILE *stream, size_t *size)
{
/* TODO:
*
* #if defined(_WIN32)
* Windows also supports fileno(), struct stat, and fstat() as _fileno(),
* _fstat(), and struct _stat.
*
* #ifdef _WIN32
* #define fstat _fstat
* #define fileno _fileno
* #define stat _stat
* #endif
*
* But which version to use? See:
* learn.microsoft/en-us/cpp/c-runtime-library/reference/fstat-fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
* #elif defined(__unix__) || defined(__linux__) et cetera
* struct stat st;
*
* if (fstat(fileno(stream), st) == 0) {
* return st.size;
* }
* return -1?
*
* }
* #else
* Fall back to the default and read it in chunks.
* #endif
*/
size_t rcount = 0;
char chunk[CHUNK_SIZE];
while ((rcount = fread(chunk, 1, CHUNK_SIZE, stream)) > 0) {
*size += rcount;
}
return !ferror(stream);
}
IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
char *lines[const static nlines])
{
for (size_t i = 0; i < nlines; ++i) {
if (fprintf(stream, "%s\n", lines[i]) < 0) {
return false;
}
}
return true;
}
IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
const char data[static nbytes])
{
size_t nwritten = fwrite(data, 1, nbytes, stream);
if (nwritten != nbytes || ferror(stream)) {
return false;
}
return true;
}
#undef CHUNK_SIZE
#undef TOKEN_CHUNK_SIZE
#endif /* IO_IMPLEMENTATION */
And here's how it can be used:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define IO_IMPLEMENTATION
#define IO_STATIC
#include "io.h"
int main(int argc, char **argv)
{
if (argc == 2) {
FILE *fp = fopen(argv[1], "rb");
assert(fp);
size_t nbytes = 0;
char *const fbuf = io_read_file(fp, &nbytes);
assert(fbuf);
assert(io_write_file(stdout, nbytes, fbuf));
rewind(fp);
size_t size = 0;
bool rv = io_fsize(fp, &size);
assert(rv);
printf("Filesize: %zu.\n", size);
rewind(fp);
/* size_t nlines = 0; */
/* char **lines = io_split_lines(fbuf, &nlines); */
/* assert(lines); */
/* assert(io_write_lines(stdout, nlines, lines)); */
size_t ntokens = 0;
char **tokens = io_split_by_delim(fbuf, " \f\n\r\v\t", &ntokens);
assert(tokens);
assert(io_write_lines(stdout, ntokens, tokens));
free(fbuf);
free(tokens);
/* free(lines); */
fclose(fp);
}
return EXIT_SUCCESS;
}
Review Request:
Are there any bugs or undefined/implementation-defined behavior in the code?
General coding comments, style, bad practices, et cetera.