Skip to main content
2 of 4
I did not include the rest of the library code since I didn't write most of it, but I realize now it's still important to understand what's being done.

Binary (de)serialization in c++

I'm trying to make a simple library for de/serialization in c++, but I know it can be tricky to implement, so I'd really like to have my code reviewed to see if there's anything that stands out and/or can be improved. Here is the full repo containing all the code.

binaryio/reader.h:

#include <memory>
#include <span>
#include <stdexcept>

#include "binaryio/interface.h"
#include "binaryio/swap.h"

#ifndef BINARYIO_READER_H
#define BINARYIO_READER_H

namespace binaryio {

class BinaryReader : IBinaryIO {
public:
    BinaryReader(void* const begin, void* const end)
        : m_begin(reinterpret_cast<char*>(begin)),
          m_end(reinterpret_cast<char*>(end)),
          m_current(reinterpret_cast<char*>(begin)){};

    BinaryReader(void* const begin, void* const end, const endian& endianness)
        : m_begin{reinterpret_cast<char*>(begin)},
          m_end{reinterpret_cast<char*>(end)},
          m_current{reinterpret_cast<char*>(begin)}, m_endian{endianness} {};

    BinaryReader(void* const begin, const std::ptrdiff_t& size)
        : m_begin{reinterpret_cast<char*>(begin)}, m_end{m_begin + size},
          m_current{reinterpret_cast<char*>(begin)} {};

    BinaryReader(void* const begin, const std::ptrdiff_t& size,
                 const endian& endianness)
        : m_begin{reinterpret_cast<char*>(begin)}, m_end{m_begin + size},
          m_current{reinterpret_cast<char*>(begin)}, m_endian{endianness} {};

    void seek(std::ptrdiff_t offset) override {
        if (m_begin + offset > m_end)
            throw std::out_of_range("out of bounds seek");

        m_current = m_begin + offset;
    }

    size_t tell() const override {
        size_t offset{static_cast<size_t>(m_current - m_begin)};
        return offset;
    }

    template <typename T>
    T read() {
        if (m_current + sizeof(T) > m_end)
            throw std::out_of_range("out of bounds read");

        T val = *(T*)m_current;
        swap_if_needed_in_place(val, m_endian);
        m_current += sizeof(T);
        return val;
    }

    std::string read_string(size_t max_len = 0) {
        if (m_current + max_len > m_end || max_len == 0)
            max_len = m_end - m_current;

        return {m_current, strnlen(m_current, max_len)};
    }

    template <typename T>
    std::span<T> read_many(int count) {
        if (m_current + sizeof(T) * count > m_end)
            throw std::out_of_range("out of bound read");

        std::span<T> vals{{}, count};
        for (int i{0}; i < count; ++i) {
            vals[i] = *(T*)m_current;
            swap_if_needed_in_place(vals[i], m_endian);
            m_current += sizeof(T);
        }
        return vals;
    }

    endian endianness() { return m_endian; }

    void set_endianness(endian new_endian) { m_endian = new_endian; }

    void swap_endianness() {
        if (m_endian == endian::big)
            m_endian = endian::little;
        else
            m_endian = endian::big;
    }

private:
    char* m_begin;
    char* m_end;
    char* m_current;
    endian m_endian{endian::native};
};
} // namespace binaryio
#endif

binaryio/writer.h:

#include <memory>
#include <span>
#include <vector>

#include "binaryio/align.h"
#include "binaryio/interface.h"
#include "binaryio/swap.h"

#ifndef BINARYIO_WRITER_H
#define BINARYIO_WRITER_H

namespace binaryio {
class BinaryWriter : IBinaryIO {
public:
    // Based on
    // https://github.com/zeldamods/oead/blob/master/src/include/oead/util/binary_reader.h
    BinaryWriter() = default;

    BinaryWriter(endian byte_order) : m_endian{byte_order} {};

    std::vector<uint8_t> finalize() { return std::move(m_storage); }

    void seek(std::ptrdiff_t offset) override { m_offset = offset; };
    size_t tell() const override { return m_offset; }

    void write_bytes(const uint8_t* data, size_t size) {
        std::span<const uint8_t> bytes{data, size};

        if (m_offset + bytes.size() > m_storage.size())
            m_storage.resize(m_offset + bytes.size());

        std::memcpy(&m_storage[m_offset], bytes.data(), bytes.size());
        m_offset += bytes.size();
    };

    template <typename T,
              typename std::enable_if_t<!std::is_pointer_v<T> &&
                                        std::is_trivially_copyable_v<T>>* = nullptr>
    void write(T value) {
        swap_if_needed_in_place(value, m_endian);
        write_bytes(reinterpret_cast<const uint8_t*>(&value), sizeof(value));
    }

    void write(std::string_view str) {
        write_bytes(reinterpret_cast<const uint8_t*>(str.data()), str.size());
    }

    void write_null() { write<uint8_t>(0); }

    void write_cstr(std::string_view str) {
        write(str);
        write_null();
    }

    void align_up(size_t n) { seek(AlignUp(tell(), n)); }

private:
    std::vector<uint8_t> m_storage;
    size_t m_offset{0};
    endian m_endian{endian::native};
};
} // namespace binaryio

#endif

EDIT: I did not include the rest of the library code since I didn't write most of it, but I realize now it's still important to understand what's being done. I have not written tests for this code in particular, as I used it in another project of mine where I've verified it works, but I'll be sure to provide them momentarily.

binaryio/align.h

// This file is under the public domain.

#include <cstddef>
#include <type_traits>

#ifndef BINARYIO_ALIGN_H
#define BINARYIO_ALIGN_H

namespace binaryio {
template <typename T>
constexpr T AlignUp(T value, size_t size) {
    static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
    return static_cast<T>(value + (size - value % size) % size);
}

template <typename T>
constexpr T AlignDown(T value, size_t size) {
    static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
    return static_cast<T>(value - value % size);
}

} // namespace binaryio
#endif

binaryio/interface.h

#include <cstddef>
#include <cstdint>

#ifndef BINARYIO_INTERFACE_H
#define BINARYIO_INTERFACE_H

namespace binaryio {
class IBinaryIO {
public:
    IBinaryIO() = default;
    virtual ~IBinaryIO() = default;

    virtual void seek(std::ptrdiff_t offset) = 0;
    virtual size_t tell() const = 0;
};
}
#endif

binaryio/swap.h

// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#ifndef BINARYIO_SWAP_H
#define BINARYIO_SWAP_H

#include <cstring>
#include <tuple>
#include <type_traits>

#ifdef __APPLE__
#include <libkern/OSByteOrder.h>
#elif defined(__linux__)
#include <byteswap.h>
#elif defined(__FreeBSD__)
#include <sys/endian.h>
#endif

#ifndef _WIN32
#include <arpa/inet.h>
#endif

// ---------------------------------- Added -----------------------------------

// From
// https://github.com/neobrain/blobify/blob/master/include/blobify/endian.hpp
#if defined(__cpp_lib_endian)
// C++20 provides std::endian
#include <bit>
#elif defined(__APPLE__)
#include <machine/endian.h>
#elif !defined(_WIN32)
#include <endian.h>
#endif

#include "binaryio/type_utils.h"

namespace binaryio {
#if defined(__cpp_lib_endian)
using endian = std::endian;
#else
enum class endian {
#ifdef _WIN32
    little = 0,
    big = 1,
    native = little
#else
    little = __ORDER_LITTLE_ENDIAN__,
    big = __ORDER_BIG_ENDIAN__,
    native = __BYTE_ORDER__
#endif
};
#endif

// ---------------------------------- Added -----------------------------------

inline endian get_platform_endianness() {
#ifdef _WIN32
    return endian::little;
#else
    return htonl(0x12345678) == 0x12345678 ? endian::big : endian::little;
#endif
}

inline uint8_t swap8(uint8_t data) { return data; }
inline uint32_t swap24(const uint8_t* data) {
    return (data[0] << 16) | (data[1] << 8) | data[2];
}

#if defined(ANDROID) || defined(__OpenBSD__)
#undef swap16
#undef swap32
#undef swap64
#endif

#ifdef _WIN32
inline uint16_t swap16(uint16_t data) { return _byteswap_ushort(data); }
inline uint32_t swap32(uint32_t data) { return _byteswap_ulong(data); }
inline uint64_t swap64(uint64_t data) { return _byteswap_uint64(data); }
#elif __linux__
inline uint16_t swap16(uint16_t data) { return bswap_16(data); }
inline uint32_t swap32(uint32_t data) { return bswap_32(data); }
inline uint64_t swap64(uint64_t data) { return bswap_64(data); }
#elif __APPLE__
inline __attribute__((always_inline)) uint16_t swap16(uint16_t data) {
    return OSSwapInt16(data);
}
inline __attribute__((always_inline)) uint32_t swap32(uint32_t data) {
    return OSSwapInt32(data);
}
inline __attribute__((always_inline)) uint64_t swap64(uint64_t data) {
    return OSSwapInt64(data);
}
#elif __FreeBSD__
inline uint16_t swap16(uint16_t data) { return bswap16(data); }
inline uint32_t swap32(uint32_t data) { return bswap32(data); }
inline uint64_t swap64(uint64_t data) { return bswap64(data); }
#else
// Slow generic implementation.
inline uint16_t swap16(uint16_t data) { return (data >> 8) | (data << 8); }
inline uint32_t swap32(uint32_t data) {
    return (swap16(data) << 16) | swap16(data >> 16);
}
inline uint64_t swap64(uint64_t data) {
    return ((uint64_t)swap32(data) << 32) | swap32(data >> 32);
}
#endif

inline uint16_t swap16(const uint8_t* data) {
    uint16_t value;
    std::memcpy(&value, data, sizeof(uint16_t));

    return swap16(value);
}
inline uint32_t swap32(const uint8_t* data) {
    uint32_t value;
    std::memcpy(&value, data, sizeof(uint32_t));

    return swap32(value);
}
inline uint64_t swap64(const uint8_t* data) {
    uint64_t value;
    std::memcpy(&value, data, sizeof(uint64_t));

    return swap64(value);
}

template <int count>
void swap(uint8_t*);

template <>
inline void swap<1>(uint8_t*) {}

template <>
inline void swap<2>(uint8_t* data) {
    const uint16_t value = swap16(data);

    std::memcpy(data, &value, sizeof(uint16_t));
}

template <>
inline void swap<4>(uint8_t* data) {
    const uint32_t value = swap32(data);

    std::memcpy(data, &value, sizeof(uint32_t));
}

template <>
inline void swap<8>(uint8_t* data) {
    const uint64_t value = swap64(data);

    std::memcpy(data, &value, sizeof(uint64_t));
}

/// Byte swap a value.
template <typename T>
inline T swap_value(T data) {
    static_assert(std::is_arithmetic<T>(),
                  "function only makes sense with arithmetic types");

    swap<sizeof(data)>(reinterpret_cast<uint8_t*>(&data));
    return data;
}

/// Swap a value if its endianness is not the same as the machine endianness.
/// @param endian  The endianness of the value.
template <typename T>
void swap_if_needed_in_place(T& value, endian endian) {
    if (get_platform_endianness() == endian)
        return;

    if constexpr (std::is_arithmetic<T>()) {
        value = swap_value(value);
    }

    if constexpr (ExposesFields<T>()) {
        std::apply(
            [endian](auto&... fields) {
                (swap_if_needed_in_place(fields, endian), ...);
            },
            value.fields());
    }
}

template <typename T>
T swap_if_needed(T value, endian endian) {
    swap_if_needed_in_place(value, endian);
    return value;
}
} // namespace binaryio
#endif

binaryio/type_utils.h

/**
 * Copyright (C) 2020 leoetlino
 *
 * This file is part of oead.
 *
 * oead is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * oead is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with oead.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <string_view>
#include <tuple>
#include <utility>

#ifndef BINARYIO_TYPE_UTILS_H
#define BINARYIO_TYPE_UTILS_H

namespace binaryio {

#define BINARYIO_DEFINE_FIELDS(TYPE, ...)                                      \
    constexpr auto fields() { return std::tie(__VA_ARGS__); }                  \
    constexpr auto fields() const { return std::tie(__VA_ARGS__); }            \
    constexpr friend bool operator==(const TYPE& lhs, const TYPE& rhs) {       \
        return lhs.fields() == rhs.fields();                                   \
    }                                                                          \
    constexpr friend bool operator!=(const TYPE& lhs, const TYPE& rhs) {       \
        return !(lhs == rhs);                                                  \
    }                                                                          \
    template <typename H>                                                      \
    friend H AbslHashValue(H h, const TYPE& self) {                            \
        return H::combine(std::move(h), self.fields());                        \
    }

/// Strongly typed wrapper around arithmetic types
/// to make types clear especially for Python bindings.
template <typename T>
struct number {
    static_assert(std::is_arithmetic<T>(), "T must be an arithmetic type");
    constexpr number() = default;
    constexpr explicit number(T v) : value{v} {}
    constexpr operator T() const { return value; }
    constexpr number& operator=(T v) { return value = v, *this; }
    constexpr number& operator++(int) { return ++value, *this; }
    constexpr number& operator--(int) { return --value, *this; }
    constexpr number& operator++() { return value++, *this; }
    constexpr number& operator--() { return value--, *this; }
    constexpr number& operator+=(T rhs) { return value += rhs, *this; }
    constexpr number& operator-=(T rhs) { return value -= rhs, *this; }
    constexpr number& operator*=(T rhs) { return value *= rhs, *this; }
    constexpr number& operator/=(T rhs) { return value /= rhs, *this; }
    constexpr number& operator%=(T rhs) { return value %= rhs, *this; }
    constexpr number& operator&=(T rhs) { return value &= rhs, *this; }
    constexpr number& operator|=(T rhs) { return value |= rhs, *this; }
    constexpr number& operator<<=(T rhs) { return value <<= rhs, *this; }
    constexpr number& operator>>=(T rhs) { return value >>= rhs, *this; }
    BINARYIO_DEFINE_FIELDS(number, value);

    T value;
};

/// Casts a string-like object to a string view.
template <typename T>
std::string_view str(const T& str_like) {
    return static_cast<std::string_view>(str_like);
}

template <typename, template <typename> class, typename = std::void_t<>>
struct Detect : std::false_type {};
template <typename T, template <typename> class Op>
struct Detect<T, Op, std::void_t<Op<T>>> : std::true_type {};

template <typename T>
using ExposesFieldsImpl = decltype(std::declval<T>().fields());
template <typename T>
using ExposesFields = Detect<T, ExposesFieldsImpl>;

} // namespace binaryio
#endif
```