These 4 functions I use to handle representation of floating-point numbers :
- float32ToBits
- float64ToBits
- bitsToFloat32
- bitsToFloat64
The first 2 functions return an array (filled with 0 or 1) with the sign, exponent and mantissa, and the last 2 return a floating point number.
When you want to convert a float number IEEE 754 to an array of 32-bits (0 or 1) :
function float32ToBits(floatNumber) {
// Check if the input is a valid number
if (typeof floatNumber !== "number")
throw new Error("A float number is expected.");
// Create a 4-byte ArrayBuffer to store the float
var buffer = new ArrayBuffer(4);
// Create a Float32Array view on the buffer and store the float number
(new Float32Array(buffer))[0] = floatNumber;
// Create a Uint32Array view on the buffer to access the bits as an integer
var uint32View = new Uint32Array(buffer)[0];
// Convert the integer to a 32-bit binary string
var binaryString = uint32View.toString(2).padStart(32, "0");
// Extract the sign bit, exponent, and mantissa
var signBit = binaryString.substring(0, 1);
var exponentBits = binaryString.substring(1, 9);
var mantissaBits = binaryString.substring(9, 32);
// Return the sign, exponent, and mantissa as an array
return [signBit, exponentBits, mantissaBits];
}
console.log(float32ToBits(Math.PI).join(''))
// 01000000010010010000111111011011
When you want to convert a float number IEEE 754 to an array of 64-bits (0 or 1) :
function float64ToBits(doubleNumber) {
// Check if the input is a valid number
if (typeof doubleNumber !== "number")
throw new Error("A float number is expected.");
// Create an 8-byte ArrayBuffer to store the double
var buffer = new ArrayBuffer(8);
// Create a Float64Array view on the buffer and store the double number
(new Float64Array(buffer))[0] = doubleNumber;
// Create a Uint32Array view on the buffer to access the bits as integers
var uint32Array = new Uint32Array(buffer);
// Combine the two 32-bit integers into a 64-bit binary string
let binaryString = "";
for (let i = 1; i >= 0; --i) {
var bits = uint32Array[i].toString(2).padStart(32, "0");
binaryString += bits;
}
// Extract the sign bit, exponent, and mantissa
var signBit = binaryString.substring(0, 1);
var exponentBits = binaryString.substring(1, 12);
var mantissaBits = binaryString.substring(12, 64);
// Return the sign, exponent, and mantissa as an array
return [signBit, exponentBits, mantissaBits];
}
console.log(float64ToBits(Math.PI).join(''))
// 0100000000001001001000011111101101010100010001000010110100011000
When you want to convert a string of bits (0 or 1) to a 32-bit IEEE 754 float number :
function bitsToFloat32(bitString) {
// Ensure the bit string is exactly 32 bits long
bitString = bitString.padStart(32, "0").slice(-32);
// Validate that the string contains only '0' or '1'
for (let i = 0; i < 32; ++i) {
if (bitString[i] !== '0' && bitString[i] !== '1') {
throw new Error("A 32-bit string is expected.");
}
}
// Create a 4-byte ArrayBuffer
var buffer = new ArrayBuffer(4);
// Create a Uint8Array view on the buffer to manipulate each byte
var uint8View = new Uint8Array(buffer);
// Convert the 32-bit string into bytes and store them in the buffer
for (let i = 32, byteIndex = 0; i > 0; i -= 8) {
uint8View[byteIndex++] = parseInt(bitString.substring(i - 8, i), 2);
}
// Convert the buffer back into a float32
return new Float32Array(buffer)[0];
}
When you want to convert a string of bits (0 or 1) to a 64-bit IEEE 754 float number :
function bitsToFloat64(bitString) {
// Ensure the bit string is exactly 64 bits long
bitString = bitString.padStart(64, "0").slice(-64);
// Validate that the string contains only '0' or '1'
for (let i = 0; i < 64; ++i) {
if (bitString[i] !== '0' && bitString[i] !== '1') {
throw new Error("A 64-bit string is expected.");
}
}
// Create an 8-byte ArrayBuffer
var buffer = new ArrayBuffer(8);
// Create a Uint8Array view on the buffer to manipulate each byte
var uint8View = new Uint8Array(buffer);
// Convert the 64-bit string into bytes and store them in the buffer
for (let i = 64, byteIndex = 0; i > 0; i -= 8) {
uint8View[byteIndex++] = parseInt(bitString.substring(i - 8, i), 2);
}
// Convert the buffer back into a float64
return new Float64Array(buffer)[0];
}
Example of use with a condensed snake-case version exploring the pi number in 32-bit/64-bit :
const float_32_to_bits = (n_float) => {
if (typeof n_float !== "number")
throw new Error("A float number is expected.")
const buf = new ArrayBuffer(4)
;(new Float32Array(buf))[0] = n_float
const tmp_1 = new Uint32Array(buf)[0]
const tmp_2 = tmp_1.toString(2).padStart(32, "0")
const sign = tmp_2.substring(0, 1)
const expo = tmp_2.substring(1, 9)
const mant = tmp_2.substring(9, 32)
return [ sign, expo, mant ]
}
const float_64_to_bits = (n_double) => {
if (typeof n_double !== "number")
throw new Error("A float number is expected.")
const buf = new ArrayBuffer(8);
;(new Float64Array(buf))[0] = n_double
const arr = new Uint32Array(buf)
let tmp = ""
for(let i = 1; i >=0; --i) {
const tmp_1 = arr[i]
const tmp_2 = tmp_1.toString(2).padStart(32, "0")
tmp += tmp_2
}
const sign = tmp.substring(0, 1)
const expo = tmp.substring(1, 12)
const mant = tmp.substring(12, 64)
return [ sign, expo, mant ]
}
const bits_to_float_32 = (bits_string) => {
bits_string = bits_string.padStart(32, "0").slice(-32)
for(let i = 0; i < 32; ++i)
if (bits_string[i] != '0' && bits_string[i] != '1')
throw new Error("A 32-bit string is expected.")
const buf = new ArrayBuffer(4);
const tmp = new Uint8Array(buf);
for(let a = 32, k = 0; a > 0; a -= 8)
tmp[k++] = parseInt(bits_string.substring(a - 8, a), 2)
return new Float32Array(buf)[0]
}
const bits_to_float_64 = (bits_string) => {
for(let i = 0; i < 64; ++i)
if (bits_string[i] != '0' && bits_string[i] != '1')
throw new Error("A 64-bit string is expected.")
bits_string = bits_string.padStart(64, "0").slice(-64)
const buf = new ArrayBuffer(8);
const tmp = new Uint8Array(buf);
for(let a = 64, k = 0; a > 0; a -= 8)
tmp[k++] = parseInt(bits_string.substring(a - 8, a), 2)
return new Float64Array(buf)[0]
}
console.log(float_32_to_bits(Math.PI).join(' '))
console.log(float_64_to_bits(Math.PI).join(' '))
console.log(bits_to_float_32('01000000010010010000111111011011'))
console.log(bits_to_float_64('0100000000001001001000011111101101010100010001000010110100011000'))