I want to process a file chunk by chunk to prevent memory exhaustion. I need to consume the file using a read stream. When trying it, this implementation seems to work fine.
I am asking your expert eyes:
- have I overlooked anything or did I forget some case?
- have I made a mistake somewhere and it's going to bite me in prod?
- could this be improved?
The main code:
async function processFileByChunk(filePath) {
  try {
    const videoStream = fs.createReadStream(filePath);
    const stats = fs.statSync(filePath);
    await new Promise((resolve, reject) => {
      let bytesRead = 0;
      let countCurrentUploads = 0;
      videoStream.on("readable", async function () {
        while (true) {
          await wait(() => countCurrentUploads <= 0, 1000);
          const chunk = videoStream.read(16 * 1024 * 1024);
          if (!chunk || !chunk.length) {
            break;
          }
          bytesRead += chunk.length;
          console.log("bytesRead", bytesRead);
          countCurrentUploads++;
          await processChunk(chunk);
          countCurrentUploads--;
        }
        if (bytesRead >= stats.size) {
          resolve();
        }
      });
      videoStream.on("error", function (error) {
        reject(error);
      });
    });
  } catch (error) {
    console.log(error);
  }
}
Other functions:
async function processChunk(chunk) {
  console.log("process chunk...");
  await delay(2000);
  console.log("process chunk... done");
}
async function wait(fn, ms) {
  while (!fn()) {
    await delay(ms);
  }
}
function delay(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}
Applied on a ~63MB file, it prints out:
bytesRead 16777216
process chunk...
process chunk... done
bytesRead 33554432
process chunk...
process chunk... done
bytesRead 50331648
process chunk...
process chunk... done
bytesRead 63598316
process chunk...
process chunk... done

