For my current project I need a circular buffer, which is able to do the following things:
- Push something to it (to the head).
 - Pop something from it (from the tail). I don't need the popped data.
 - Peak the head and tail (no popping).
 - Iterate through the currently contained elements.
 
Here is my C implementation.
circularBuffer.h:
#ifndef CIRCULAR_BUFFER_H
#define CIRCULAR_BUFFER_H
#include <stdlib.h>
#include <stdbool.h>
struct circularBuffer {
  void *data;         // Holds the buffer data.
  size_t headOffset;  // The next position the buffer will write to.
  size_t tailOffset;  // The position of the buffer tail.
  size_t elementSize; // Size of one element contained in the buffer.
  size_t numElements; // Number of elements the buffer is able to hold at once.
  bool   isEmpty;     // Flag, which holds, whether the buffer is empty. 
                      // Allows to fill the whole buffer without losing the ability 
                      // to determine, whether its empty or not.
};
struct circularBuffer *circularBuffer_create(size_t numElements, size_t elementSize);
// Push/Pop
void  circularBuffer_push(struct circularBuffer *buf, void *ptr);
int   circularBuffer_popTail(struct circularBuffer *buf);
// Get data
size_t circularBuffer_containedCount(struct circularBuffer *buf);
void *circularBuffer_peakTail(struct circularBuffer *buf);
void *circularBuffer_peakHead(struct circularBuffer *buf);
#endif /* !defined CIRCULAR_BUFFER_H */
circularBuffer.c:
#include <string.h>
#include <stdio.h>
#include "circularBuffer.h"
struct circularBuffer *circularBuffer_create(size_t numElements, size_t elementSize) {
  struct circularBuffer *tmp = calloc(1, sizeof(struct circularBuffer));
  if (!tmp) { return NULL; }
  tmp->data = malloc(numElements * elementSize);
  if (!tmp->data) {
    free(tmp);
    return NULL;
  }
  tmp->numElements = numElements;
  tmp->elementSize = elementSize;
  tmp->isEmpty = true;
  return tmp;
}
void circularBuffer_push(struct circularBuffer *buf, void *ptr) {
  if (!buf->isEmpty && buf->headOffset == buf->tailOffset) {
    buf->tailOffset = (buf->tailOffset + 1) % buf->numElements;
  }
  memcpy(buf->data + buf->headOffset*buf->elementSize, ptr, buf->elementSize);
  buf->headOffset = (buf->headOffset + 1) % buf->numElements;
  buf->isEmpty = false;
}
void *circularBuffer_peakTail(struct circularBuffer *buf) {
  return buf->isEmpty ? NULL : buf->data + buf->tailOffset * buf->elementSize;
}
void *circularBuffer_peakHead(struct circularBuffer *buf) {
  if (buf->isEmpty)                          { return NULL; }
  else if (buf->data + buf->headOffset != 0) { return buf->data + (buf->headOffset-1) * buf->elementSize; } 
  else                                       { return buf->data + (buf->numElement-1) * buf->elementSize; }
}
int circularBuffer_popTail(struct circularBuffer *buf) {
  if (buf->isEmpty) { return 0; }       // Empty buffer.
  buf->tailOffset = (buf->tailOffset + 1) % buf->numElements; 
  if (buf->tailOffset == buf->headOffset) { 
    buf->isEmpty = true; 
  }
  return 1;
}
size_t circularBuffer_containedCount(struct circularBuffer *buf) {
  if (buf->isEmpty) { return 0; }
  else if (buf->tailOffset < buf->headOffset) { return buf->headOffset - buf->tailOffset; }
  else if (buf->tailOffset == buf->headOffset) { return buf->numElements; }
  else return buf->numElements - buf->tailOffset + buf->headOffset;
}
Notes: I explicitly didn't check whether the buffer pointer is NULL in every function. The user should take care of it.
This basic implementation allows me to do everything I listed above quite nicely I think. Is there anything I could do better according performance? I'll probably use this pretty heavily.
circularBuffer_peakHead()justreturn buf->isEmpty ? NULL : buf->data + buf->headOffset * buf->elementSize;? \$\endgroup\$