I've been working on a byte-oriented SHA-256 implementation for educational purposes. I then tested it using the NIST CAVP tests found here.
All tests pass but I would appreciate some feedback from a third party (especially regarding things I may be too involved to see like pointer errors or performance improvements that may have slipped my mind).
sha256.h
#ifndef SHA256_H
#define SHA256_H
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "utils.h"
void sha256(uint8_t* message, size_t message_length);
#endif
sha256.c
#include "sha256.h"
const uint32_t round_constants[64] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
uint32_t right_rotate(uint32_t x, size_t k) {
    return ((x >> k) | (x << (32 - k)));
}
uint32_t choice(uint32_t x, uint32_t y, uint32_t z) {
    return ((x & y) ^ (~x & z));
}
uint32_t majority(uint32_t x, uint32_t y, uint32_t z) {
    return ((x & y) ^ (x & z) ^ (y & z));
}
uint32_t delta0(uint32_t x) {
    return (right_rotate(x, 2) ^ right_rotate(x, 13) ^ right_rotate(x, 22));
}
uint32_t delta1(uint32_t x) {
    return (right_rotate(x, 6) ^ right_rotate(x, 11) ^ right_rotate(x, 25));
}
uint32_t sigma0(uint32_t x) {
    return (right_rotate(x, 7) ^ right_rotate(x, 18) ^ (x >> 3));
}
uint32_t sigma1(uint32_t x) {
    return (right_rotate(x, 17) ^ right_rotate(x, 19) ^ (x >> 10));
}
void sha256_padding(uint8_t* message, uint8_t** buffer, size_t message_length, size_t* buffer_length) {
    size_t total_zeros = 0;
    uint64_t bit_length = 0;
    // When the message is 9 bytes short of a block multiple there is no additional block added.
    if ((message_length + 9) % 64 == 0) {
        *buffer_length = message_length + 9;
    } else {
        *buffer_length = ((message_length + 9 + 64) / 64) * 64;
    }
    *buffer = safe_malloc((*buffer_length * sizeof **buffer));
    memcpy((*buffer), message, message_length);
    // Add the 1 bit as big-endian using the byte 0x80 = 0b10000000.
    (*buffer)[message_length] = 0x80;
    // Compute and add the needed amount of 0 bits to reach congruence modulo 512.
    total_zeros = *buffer_length - message_length - 9;
    memset((*buffer + message_length + 1), 0x00, total_zeros);
    // Add the length of the message as a big-endian 64-bit value.
    bit_length = (uint64_t)message_length * 8;
    for (size_t i = 0; i < 8; i++) {
        (*buffer)[*buffer_length - 8 + i] = (uint8_t)(bit_length >> (56 - i * 8));
    }
}
void sha256_compression(const uint8_t* block, uint32_t* hash) {
    uint32_t a = hash[0];
    uint32_t b = hash[1];
    uint32_t c = hash[2];
    uint32_t d = hash[3];
    uint32_t e = hash[4];
    uint32_t f = hash[5];
    uint32_t g = hash[6];
    uint32_t h = hash[7];
    uint32_t temp1, temp2;
    uint32_t msg_schedule[64];
    for (size_t i = 0, j = 0; i < 16; i++, j += 4) {
        msg_schedule[i] = (block[j] << 24) | (block[j + 1] << 16) | (block[j + 2] << 8) | (block[j + 3]);
    }
    for (size_t i = 16; i < 64; i++) {
        msg_schedule[i] = sigma1(msg_schedule[i - 2]) + msg_schedule[i - 7] + sigma0(msg_schedule[i - 15]) + msg_schedule[i - 16];
    }
    for (size_t i = 0; i < 64; i++) {
        temp1 = h + delta1(e) + choice(e, f, g) + round_constants[i] + msg_schedule[i];
        temp2 = delta0(a) + majority(a, b, c);
        h = g;
        g = f;
        f = e;
        e = d + temp1;
        d = c;
        c = b;
        b = a;
        a = temp1 + temp2;
    }
    hash[0] += a;
    hash[1] += b;
    hash[2] += c;
    hash[3] += d;
    hash[4] += e;
    hash[5] += f;
    hash[6] += g;
    hash[7] += h;
}
void sha256(uint8_t* message, size_t message_length) {
    uint8_t* padded_message = NULL;
    size_t padded_length = 0;
    uint32_t hash[8] = {
        0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
    };
    sha256_padding(message, &padded_message, message_length, &padded_length);
    // Apply the compression function on every block.
    for (size_t i = 0; i < padded_length / 64; i++) {
        sha256_compression((padded_message + i * 64), hash);
    }
    for (size_t i = 0; i < 8; i++) {
        printf("%08x", hash[i]);
    }
    printf("\n");
    free(padded_message);
}
utils.h
#ifndef UTILS_H
#define UTILS_H
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
void* safe_malloc(size_t size);
FILE* safe_fopen(const char* file_path, const char* mode);
void read_file_bytes(const char* file_path, uint8_t** buffer, size_t* file_length);
#endif
utils.c
#include "utils.h"
void* safe_malloc(size_t size) {
    void* ptr = malloc(size);
    if (ptr == NULL) {
        printf("Unable to allocate enough memory!!\n");
        exit(EXIT_FAILURE);
    }
    return ptr;
}
FILE* safe_fopen(const char* file_path, const char* mode) {
    FILE* file_ptr = fopen(file_path, mode);
    if (file_ptr == NULL) {
        printf("Unable to open the file!!\n");
        exit(EXIT_FAILURE);
    }
    return file_ptr;
}
// Returns the size of a file in bytes (max 2GiB due to ftell()).
size_t file_size(FILE* file_ptr) {
    size_t file_length = 0;
    fseek(file_ptr, 0, SEEK_END);
    file_length = ftell(file_ptr);
    rewind(file_ptr);
    return file_length;
}
// Reads a file into a byte array passed by reference.
// Freeing the buffer is the callers responsibility.
void read_file_bytes(const char* file_path, uint8_t** buffer, size_t* file_length) {
    FILE* file_ptr = safe_fopen(file_path, "rb");
    size_t bytes_read = 0;
    *file_length = file_size(file_ptr);
    *buffer = safe_malloc(*file_length * sizeof **buffer);
    bytes_read = fread(*buffer, 1, *file_length, file_ptr);
    if (bytes_read != *file_length) {
        printf("An error occurred while trying to read from the file!!\n");
        exit(EXIT_FAILURE);
    }
    fclose(file_ptr);
}
driver.c
#include "sha256.h"
int main(int argc, char* argv[]) {
    uint8_t* message = NULL;
    size_t message_length = 0;
    read_file_bytes(argv[1], &message, &message_length);
    sha256(message, message_length);
    free(message);
    return 0;
}
Hope I didn't break any rules when writing the question :)) Thanks in advance
right_rotate- Best practices for circular shift (rotate) operations in C++ shows a version that's safe for any rotate count, including 0 or 32. But your code only uses it with counts between 1 and 31, so this is fine. Either way, compilers recognize it and use a rotate instruction on hardware that supports it. \$\endgroup\$