I finally ended up writing a C extension, since it did not seem like there was any way to squeeze more performance out of python/numpy implementation.
First I was using sprintf for formatting and got these results -
In [7]: a = np.random.normal(0, 1, (1234567, 3))
In [8]: %timeit ObjWrite.write(a, 'a.txt')
1.21 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Compared to ~2.5 seconds this is a bit of an improvement, but not nearly enough to justify writing the extension
Since almost all of the time was being spent on formatting the string, I wrote a sprintf replacement just for formatting doubles (accurate to 15-17th decimal place for values b/w -10^7 and 10^7, which is acceptable for my use case)
In [9]: %timeit ObjWrite.writeFast(a, 'a-fast.txt')
302 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
~300ms - Decent!
Here is the module -
ObjWrite.c
#include <stdio.h>
#include <Python.h>
#include <numpy/arrayobject.h>
#define CHUNK_SIZE 32768
/*
Write vertices to given file, use sprintf for formatting
python-interface: ObjWrite.write(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIter(PyObject *self, PyObject *args) {
// Parse arguments
PyArrayObject *arr;
char *filepath = NULL;
if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);
npy_intp size = PyArray_SIZE(arr);
// Handle zero-sized arrays specially, if size is not a multiple of 3, exit
if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);
// get iterator
NpyIter* iter;
NpyIter_IterNextFunc *iternext;
PyArray_Descr *dtype;
dtype = PyArray_DescrFromType(NPY_DOUBLE);
iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
if (iter == NULL) return PyLong_FromLong(-1);
// get iternext function for fast access
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// get data pointer, this will get updated by the iterator
double **dataptr;
dataptr = (double **) NpyIter_GetDataPtrArray(iter);
// open file, exit if null
FILE *fp = fopen(filepath, "w");
if (fp == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// init file buffer, writing in chunks does not seem to offer any significant benefit
// but it should still will be useful when disk utilization is high
char fileBuffer[CHUNK_SIZE + 128];
int bufferCount = 0;
double x, y, z;
do {
// get 3 doubles from array
x = **dataptr;
iternext(iter);
y = **dataptr;
iternext(iter);
z = **dataptr;
// use sprintf to format and write to buffer
bufferCount += sprintf(&fileBuffer[bufferCount], "v %.17f %.17f %.17f\n", x, y, z);
// if the chunk is big enough, write it.
if (bufferCount >= CHUNK_SIZE) {
fwrite(fileBuffer, bufferCount, 1, fp);
bufferCount = 0;
}
} while (iternext(iter));
// write remainder
if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);
// clean-up and exit with success
NpyIter_Deallocate(iter);
fclose(fp);
return PyLong_FromLong(0);
}
/*
Turns out that maximum proportion of time is taken by sprintf call in the above implementation
So, the next part is basically implementing a faster way to format doubles
*/
static const char DIGITS[] = "0123456789"; // digit-char lookup table
/* get powers of 10, can overflow but we only need this for digits <= 9 */
int powOf10(int digits) {
int res = 1;
while (digits > 0) {
res *= 10;
digits--;
}
return res;
}
/* a fast way to get number of digits in a positive integer */
int countDigitsPosInt(int n) {
if (n < 100000) { // 5 or less
if (n < 100) { // 1 or 2
if (n < 10) { return 1; } else { return 2; }
} else { // 3 or 4 or 5
if (n < 1000) { return 3; }
else { // 4 or 5
if (n < 10000) { return 4; } else { return 5; }
}
}
} else { // 6 or more
if (n < 10000000) { // 6 or 7
if (n < 1000000) { return 6; } else { return 7; }
} else { // 8 to 10
if (n < 100000000) { return 8; }
else { // 9 or 10
if (n < 1000000000) { return 9; } else { return 10; }
}
}
}
}
/* format positive integers into `digits` length strings, zero-pad if number of digits too high
if number digits are greater then `digits`, it will get truncated, so watch out */
int posIntToStringDigs(char *s, int n, int digits) {
int q = n;
int r;
int i = digits - 1;
while (i >= 0 && q > 0) { // assign digits from last to first
r = q % 10;
q = q / 10;
*(s + i) = DIGITS[r]; // get char from lookup table
i--;
}
while (i >= 0) { // we are here because q=0 and still some digits remain
*(s + i) = '0'; // 0 pad these
i--;
}
return digits;
}
/* format positive integers - no zero padding */
int posIntToString(char *s, int n) {
if (n == 0) { // handle 0 case, no need of counting digits in this case
*s = '0';
return 1;
}
// call posIntToStringDigs with exactly the number of digits as in the integer
return posIntToStringDigs(s, n, countDigitsPosInt(n));
}
static const int MAX_D = 8; // max number of digits we'll break things into
static const int _10D = 100000000; // 10 ^ MAX_D
/*
format positive doubles
accurate to 15-17th digit for numbers that are not huge (< 10^7), fairly accurate for huge numbers
I personally do not need this to be crazy accurate, for the range of numbers I am expecting, this will do just fine
*/
int posDoubleToString(char *s, double f, int precision) {
// length of the generated string
int len = 0;
// to make big numbers int friendly, divide by 10 ^ MAX_D until the whole part would fit in an int
int steps = 0;
while (f > _10D) {
f /= _10D;
steps++;
}
int intPart = (int) f;
double decPart = f - intPart;
// add the first whole part to the string, we have no idea how many digits would be there
len += posIntToString(&s[len], intPart);
// if the number was bigger then 10 ^ MAX_D, we need to return it to its former glory, i.e. add rest to integer string
while (steps > 0) {
decPart = decPart * _10D;
intPart = (int) decPart;
decPart = decPart - intPart;
len += posIntToStringDigs(&s[len], intPart, MAX_D); // appending 0's important here
steps--;
}
// add the decimal point
s[len++] = '.';
// after the decimal, piggy back int-to-string function to `precision` number of digits
while (precision > 0) {
if (precision > MAX_D) {
decPart = decPart * _10D;
intPart = (int) decPart;
decPart = decPart - intPart;
len += posIntToStringDigs(&s[len], intPart, MAX_D);
precision -= MAX_D;
} else {
decPart = decPart * powOf10(precision);
intPart = (int) decPart;
decPart = decPart - intPart;
if (decPart > 0.5) intPart += 1; // round of
len += posIntToStringDigs(&s[len], intPart, precision);
precision = 0;
}
}
// truncate following zeros, loop on string in reverse
/* commented to mimic sprintf
int index = len - 1;
while (index > 0) {
if (s[index] != '0') break; // if last char is not 0 our work is done, nothing more to do
if (s[index - 1] == '.') break; // if char is 0 but its the last 0 before decimal point, stop
len--;
index--;
}*/
return len;
}
/* format positive or negative doubles */
int doubleToString(char *s, double f, int pre) {
// handle negatives
int len = 0;
if (f < 0) {
*s = '-';
len++;
f *= -1; // change to positive
}
len += posDoubleToString(&s[len], f, pre);
return len;
}
/*
Write vertices to given file, use our doubleToString for formatting
python-interface: ObjWrite.writeFast(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIterFast(PyObject *self, PyObject *args) {
// Parse arguments
PyArrayObject *arr;
char *filepath = NULL;
if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);
npy_intp size = PyArray_SIZE(arr);
// Handle zero-sized arrays specially, if size is not a multiple of 3, exit
if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);
// get iterator
NpyIter* iter;
NpyIter_IterNextFunc *iternext;
PyArray_Descr *dtype;
dtype = PyArray_DescrFromType(NPY_DOUBLE);
iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
if (iter == NULL) return PyLong_FromLong(-1);
// get iternext function for fast access
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// get data pointer, this will get updated by the iterator
double **dataptr;
dataptr = (double **) NpyIter_GetDataPtrArray(iter);
// open file, exit if null
FILE *fp = fopen(filepath, "w");
if (fp == NULL) {
NpyIter_Deallocate(iter);
return PyLong_FromLong(-1);
}
// init file buffer, writing in chunks does not seem to offer any significant benefit
// but it should still will be useful when disk utilization is high
char fileBuffer[CHUNK_SIZE + 128];
int bufferCount = 0;
double x, y, z;
do {
// get 3 doubles from array
x = **dataptr;
iternext(iter);
y = **dataptr;
iternext(iter);
z = **dataptr;
// use doubleToString to format and write to buffer
fileBuffer[bufferCount++] = 'v';
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], x, 17);
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], y, 17);
fileBuffer[bufferCount++] = ' ';
bufferCount += doubleToString(&fileBuffer[bufferCount], z, 17);
fileBuffer[bufferCount++] = '\n';
// if the chunk is big enough, write it.
if (bufferCount >= CHUNK_SIZE) {
fwrite(fileBuffer, bufferCount, 1, fp);
bufferCount = 0;
}
} while (iternext(iter));
// write remainder
if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);
// clean-up and exit with success
NpyIter_Deallocate(iter);
fclose(fp);
return PyLong_FromLong(0);
}
/* Set up the methods table */
static PyMethodDef objWriteMethods[] = {
{"write", methodWriteIter, METH_VARARGS, "write numpy array to a text file in .obj format"},
{"writeFast", methodWriteIterFast, METH_VARARGS, "write numpy array to a text file in .obj format"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
/* Set up module definition */
static struct PyModuleDef objWriteModule = {
PyModuleDef_HEAD_INIT,
"ObjWrite",
"write numpy array to a text file in .obj format",
-1,
objWriteMethods
};
/* module init function */
PyMODINIT_FUNC PyInit_ObjWrite(void) {
import_array();
return PyModule_Create(&objWriteModule);
}
setup.py
from distutils.core import setup, Extension
import numpy
def main():
setup(
name="ObjWrite",
version="1.0.0",
description="Python interface for the function to write numpy array to a file",
author="Shobhit Vashistha",
author_email="[email protected]",
ext_modules=[
Extension("ObjWrite", ["ObjWrite.c"], include_dirs=[numpy.get_include()])
]
)
if __name__ == "__main__":
main()
I am aware that this is probably overkill, but I had great fun diving into C and Python/Numpy C-Extension world, and hopefully someone else will find this useful in the future.
np.savetxt, my impression is that there's extra overhead to handle the various data types and optionsnp.savetxt()supports, which is why it's slower than your own loop which does exactly what you need it to do.obj_file.write('')withpass. Better yet, don't open and close the file twice if you want overwrite it. Just domode = 'w' if overwrite else 'a'%timeit '{}'.format(1.123456789)- 380 ns ± 3.75 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each),%timeit '%s' % 1.123456789- 330 ns ± 3.93 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)