Currently, in my games, I'm using new and delete to create entities and components. It works fine, but there are slowdowns when creating/deleting many (5000+) entities or components at once.
After asking for advice on StackOverflow, I decided to create my own memory pre-allocator.
- It allocates a specific amount of memory on the heap when constructed.
- It creates objects on "pieces" of the allocated memory by using
Preallocator::create<T>() - It destroys (calls destructor and reclaims memory "pieces") objects with
Preallocator::destroy<T> - It holds no specific type - it can hold different type of objects at once
- It is faster than using
newanddelete.
But is it ready to use in production code?
Could it be even faster?
class PreAllocator
{
private:
constexpr static unsigned int bufferSize{1000};
using MemoryUnit = char;
using MemoryPtr = MemoryUnit*;
struct Piece
{
// Piece is a range of memory [begin, end)
MemoryPtr begin, end;
inline Piece(MemoryPtr mStart, MemoryPtr mEnd) : begin{mStart}, end{mEnd} { }
inline size_t getSize() const { return sizeof(MemoryUnit) * (end - begin); }
};
MemoryUnit* buffer{new MemoryUnit[bufferSize]};
vector<Piece> available;
inline void unifyFrom(unsigned int mIndex)
{
auto lastEnd(available[mIndex].end);
auto itr(std::begin(available) + mIndex + 1);
for(; itr != std::end(available); ++itr)
if(itr->begin == lastEnd) lastEnd = itr->end;
else break;
available.erase(std::begin(available) + mIndex, itr);
available.emplace_back(available[mIndex].begin, lastEnd);
}
inline void unifyContiguous()
{
std::sort(std::begin(available), std::end(available), [](const Piece& mA, const Piece& mB){ return mA.begin < mB.begin; });
for(unsigned int i{0}; i < available.size(); ++i) unifyFrom(i);
}
inline std::vector<Piece>::iterator findSuitableMemory(size_t mRequiredSize)
{
// Tries to find a memory piece big enough to hold mRequiredSize
// If it is not found, contiguous memory pieces are unified
// If it is not found again, throws an exception
for(int i{0}; i < 2; ++i)
{
for(auto itr(std::begin(available)); itr != std::end(available); ++itr) if(itr->getSize() >= mRequiredSize) return itr;
unifyContiguous();
}
throw;
}
public:
PreAllocator()
{
// Add the whole buffer to the available memory vector
available.emplace_back(&buffer[0], &buffer[bufferSize]);
}
~PreAllocator() { delete[] buffer; }
template<typename T> inline T* create()
{
// Creates and returns a T* allocated with "placement new" on an available piece of the buffer
// T must be the "real object type" - this method will fail with pointers to bases that store derived instances!
auto requiredSize(sizeof(T));
auto suitable(findSuitableMemory(requiredSize));
MemoryPtr toUse{suitable->begin};
Piece leftover{toUse + requiredSize, suitable->end};
available.erase(suitable);
if(leftover.getSize() > 0) available.push_back(leftover);
return new (toUse) T;
}
template<typename T> inline void destroy(T* mObject)
{
// Destroys a previously allocated object, calling its destructor and reclaiming its memory piece
// T must be the "real object type" - this method will fail with pointers to bases that store derived instances!
mObject->~T();
auto objStart(reinterpret_cast<MemoryPtr>(mObject));
available.emplace_back(objStart, objStart + sizeof(T));
}
};
Test/benchmark:
struct ObjBase { };
struct TestObj : ObjBase { char data[100]; };
struct TestObjBig : ObjBase { char data[500]; };
int main()
{
PreAllocator p;
startBenchmark();
{
for(int k = 0; k < 10000; ++k)
{
vector<TestObj*> objs;
vector<TestObjBig*> objsbig;
for(int n = 0; n < 300; ++n)
{
for(int i = 0; i < 5; ++i) objs.push_back(p.create<TestObj>());
for(int i = 0; i < 5; ++i) p.destroy(objs[i]);
objs.clear();
}
for(int n = 0; n < 100; ++n)
{
for(int i = 0; i < 2; ++i) objsbig.push_back(p.create<TestObjBig>());
for(int i = 0; i < 2; ++i) p.destroy(objsbig[i]);
objsbig.clear();
}
}
}
string b1 = endBenchmark();
startBenchmark();
{
for(int k = 0; k < 10000; ++k)
{
vector<TestObj*> objs;
vector<TestObjBig*> objsbig;
for(int n = 0; n < 300; ++n)
{
for(int i = 0; i < 5; ++i) objs.push_back(new TestObj());
for(int i = 0; i < 5; ++i) delete objs[i];
objs.clear();
}
for(int n = 0; n < 100; ++n)
{
for(int i = 0; i < 2; ++i) objsbig.push_back(new TestObjBig());
for(int i = 0; i < 2; ++i) delete objsbig[i];
objsbig.clear();
}
}
}
string b2 = endBenchmark();
cout << b1 << endl; // prints "193ms"
cout << b2 << endl; // prints "957ms"
return 0;
realcode. But put print statements in your constructors and destructors to get an idea of the number of objects being created and destroyed (and an approximate order). Then write a benchmark that imitates a real run of your application. \$\endgroup\$