I'm trying to write a scraping script with node.js, cheerio and puppeteer.
The issue I'm having at the moment is trying to get the writeFile() function to work correctly.
const puppeteer = require('puppeteer');
const $ = require('cheerio');
const fs = require('fs');
const url = 'https://www.made-up-url.com/';
puppeteer
.launch()
.then(function(browser) {
return browser.newPage();
})
.then(function(page) {
return page.goto(url).then(function() {
return page.content();
});
})
.then(function(html) {
let scrapedTitle = $('h1', html).each(function() {
return $(this).text();
});
let scrapedDesc = $('#something-else > div > p', html).each(function() {
return $(this).text();
});
// Both console logs are outputted as strings, as expected
console.log(scrapedTitle[0].children[0].data); // "Some title"
console.log(scrapedDesc[0].children[0].data); // "The description is like this."
fs.writeFile("data.json", JSON.stringify(scrapedTitle[0].children[0].data), 'utf8', function(err) {
if(err) {
return console.log(err);
}
console.log("The data has been scraped and saved successfully! View it at './data.json'");
});
process.exit();
})
.catch(function(err) {
console.log(err);
process.exit();
});
writeFile() has worked in the past with something like this fs.writeFile("data.json", JSON.stringify(html), 'utf8', function(err) {, but now this appears to not be working either. When I run this script the file is created, as expected, but it is always empty, even when the two console.log() shows there should be valid strings to write. There are no errors. Any ideas please?