I want to process a file chunk by chunk to prevent memory exhaustion. I need to consume the file using a read stream. When trying it, this implementation seems to work fine.
I am asking your expert eyes:
- have I overlooked anything or did I forget some case?
- have I made a mistake somewhere and it's going to bite me in prod?
- could this be improved?
The main code:
async function processFileByChunk(filePath) {
try {
const videoStream = fs.createReadStream(filePath);
const stats = fs.statSync(filePath);
await new Promise((resolve, reject) => {
let bytesRead = 0;
let countCurrentUploads = 0;
videoStream.on("readable", async function () {
while (true) {
await wait(() => countCurrentUploads <= 0, 1000);
const chunk = videoStream.read(16 * 1024 * 1024);
if (!chunk || !chunk.length) {
break;
}
bytesRead += chunk.length;
console.log("bytesRead", bytesRead);
countCurrentUploads++;
await processChunk(chunk);
countCurrentUploads--;
}
if (bytesRead >= stats.size) {
resolve();
}
});
videoStream.on("error", function (error) {
reject(error);
});
});
} catch (error) {
console.log(error);
}
}
Other functions:
async function processChunk(chunk) {
console.log("process chunk...");
await delay(2000);
console.log("process chunk... done");
}
async function wait(fn, ms) {
while (!fn()) {
await delay(ms);
}
}
function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
Applied on a ~63MB file, it prints out:
bytesRead 16777216
process chunk...
process chunk... done
bytesRead 33554432
process chunk...
process chunk... done
bytesRead 50331648
process chunk...
process chunk... done
bytesRead 63598316
process chunk...
process chunk... done