edited tags; edited title; edited tags

Link

edited Dec 14, 2015 at 7:46

200_success

145.6k
22
191
481

How can I speed up this python code for reading Reading a binary file containing periodic samples

deleted 9 characters in body

Source Link

edited Dec 14, 2015 at 3:52

Quill

12.1k
5
41
94

I have the following code for reading HTK feature files. The code below is working completely correct (verified it with unit tests and the output of the original HTK toolkit).

from HTK_model import FLOAT_TYPE
from numpy import array
from struct import unpack

def feature_reader(file_name):
    with open(file_name, 'rb') as in_f:
        #There are four standard headers. Sample period is not used
        num_samples = unpack('>i', in_f.read(4))[0]
        sample_period = unpack('>i', in_f.read(4))[0]
        sample_size = unpack('>h', in_f.read(2))[0]
        param_kind = unpack('>h', in_f.read(2))[0]

        compressed = bool(param_kind & 02000)

        #If compression is used, two matrices are defined. In that case the values are shorts, and the real values are:
        # (x+B)/A
        A = B = 0
        if compressed:
            A = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            B = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            #The first 4 samples were the matrices
            num_samples -= 4

        for _ in xrange(0,num_samples):
            if compressed:
                yield ((array( unpack('>' + ('h' * (sample_size//2)),in_f.read(sample_size)) ,dtype=FLOAT_TYPE) + B) / A)
            else:
                yield (array( unpack('>' + ('f' * (sample_size//4)),in_f.read(sample_size)), dtype=FLOAT_TYPE))

How can I speed up this code? And, are there any things I should improve in thisthe code?

I have the following code for reading HTK feature files. The code below is working completely correct (verified it with unit tests and the output of the original HTK toolkit).

from HTK_model import FLOAT_TYPE
from numpy import array
from struct import unpack

def feature_reader(file_name):
    with open(file_name, 'rb') as in_f:
        #There are four standard headers. Sample period is not used
        num_samples = unpack('>i', in_f.read(4))[0]
        sample_period = unpack('>i', in_f.read(4))[0]
        sample_size = unpack('>h', in_f.read(2))[0]
        param_kind = unpack('>h', in_f.read(2))[0]

        compressed = bool(param_kind & 02000)

        #If compression is used, two matrices are defined. In that case the values are shorts, and the real values are:
        # (x+B)/A
        A = B = 0
        if compressed:
            A = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            B = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            #The first 4 samples were the matrices
            num_samples -= 4

        for _ in xrange(0,num_samples):
            if compressed:
                yield ((array( unpack('>' + ('h' * (sample_size//2)),in_f.read(sample_size)) ,dtype=FLOAT_TYPE) + B) / A)
            else:
                yield (array( unpack('>' + ('f' * (sample_size//4)),in_f.read(sample_size)), dtype=FLOAT_TYPE))

How can I speed up this code? And are there any things I should improve in this code?

I have the following code for reading HTK feature files. The code below is working completely correct (verified it with unit tests and the output of the original HTK toolkit).

from HTK_model import FLOAT_TYPE
from numpy import array
from struct import unpack

def feature_reader(file_name):
    with open(file_name, 'rb') as in_f:
        #There are four standard headers. Sample period is not used
        num_samples = unpack('>i', in_f.read(4))[0]
        sample_period = unpack('>i', in_f.read(4))[0]
        sample_size = unpack('>h', in_f.read(2))[0]
        param_kind = unpack('>h', in_f.read(2))[0]

        compressed = bool(param_kind & 02000)

        #If compression is used, two matrices are defined. In that case the values are shorts, and the real values are:
        # (x+B)/A
        A = B = 0
        if compressed:
            A = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            B = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            #The first 4 samples were the matrices
            num_samples -= 4

        for _ in xrange(0,num_samples):
            if compressed:
                yield ((array( unpack('>' + ('h' * (sample_size//2)),in_f.read(sample_size)) ,dtype=FLOAT_TYPE) + B) / A)
            else:
                yield (array( unpack('>' + ('f' * (sample_size//4)),in_f.read(sample_size)), dtype=FLOAT_TYPE))

How can I speed up this code, are there things I should improve in the code?

Tweeted twitter.com/#!/StackCodeReview/status/52384316369547264

occurred Mar 28, 2011 at 14:59

Source Link

asked Mar 28, 2011 at 7:57

Peter Smit

183
6

How can I speed up this python code for reading a binary file

I have the following code for reading HTK feature files. The code below is working completely correct (verified it with unit tests and the output of the original HTK toolkit).

from HTK_model import FLOAT_TYPE
from numpy import array
from struct import unpack

def feature_reader(file_name):
    with open(file_name, 'rb') as in_f:
        #There are four standard headers. Sample period is not used
        num_samples = unpack('>i', in_f.read(4))[0]
        sample_period = unpack('>i', in_f.read(4))[0]
        sample_size = unpack('>h', in_f.read(2))[0]
        param_kind = unpack('>h', in_f.read(2))[0]

        compressed = bool(param_kind & 02000)

        #If compression is used, two matrices are defined. In that case the values are shorts, and the real values are:
        # (x+B)/A
        A = B = 0
        if compressed:
            A = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            B = array([unpack('>f',in_f.read(4))[0] for _ in xrange(sample_size/2)], dtype=FLOAT_TYPE)
            #The first 4 samples were the matrices
            num_samples -= 4

        for _ in xrange(0,num_samples):
            if compressed:
                yield ((array( unpack('>' + ('h' * (sample_size//2)),in_f.read(sample_size)) ,dtype=FLOAT_TYPE) + B) / A)
            else:
                yield (array( unpack('>' + ('f' * (sample_size//4)),in_f.read(sample_size)), dtype=FLOAT_TYPE))

How can I speed up this code? And are there any things I should improve in this code?

python

Stack Exchange Network

Return to Question

How can I speed up this python code for reading Reading a binary file containing periodic samples

How can I speed up this python code for reading a binary file