You are already returning the first result in your FOR-Loop in your function getCompanies. Once you return inside a function it is finished so the other iterations are not executed. Therefore you only get the results for the first page.
You can create a buffer array outside the FOR-Loop and add results from inside the FOR-Loop and then return all results below the FOR-Loop.
For example (without testing your code):
const getCompanies = async () => {
const results = [];
for (n = 0; n < baseUrl.length; n++) {
const html = await rp(baseUrl[n]);
const businessMap = cheerio('h3 > a', html).map(async (i, e) => {
const link = "https://clutch.co" + e.attribs.href;
const innerHtml = await rp(link);
let title = cheerio('.h2_title', innerHtml).text().replace(/ss+/g, ' ');
let rating = cheerio('#summary_section > div > div.col-md-6.summary-description > div:nth-child(2) > div.col-md-9 > div > span', innerHtml).text();
let name = e.children[0].data;
//console.log(name)
return {
title,
rating,
name
}
}).get();
const result = await Promise.all(businessMap);
results.concat(result);
}
return results;
};
I am very new to Coding. Currently I am learning some web scraping tutorials.
I have some issues in my code. I want to add all results to csv, but it only adds the first page results.
const rp = require('request-promise');
const otcsv = require('objects-to-csv');
const cheerio = require('cheerio');
const { Console } = require('console');
const baseUrl = ['https://clutch.co/directory/mobile-application-developers?page=1','https://clutch.co/directory/mobile-application-developers?page=2'];
const getCompanies = async () => {
for (n = 0; n < baseUrl.length; n++) {
const html = await rp(baseUrl[n]);
const businessMap = cheerio('h3 > a', html).map(async (i, e) => {
const link = "https://clutch.co" + e.attribs.href;
const innerHtml = await rp(link);
let title = cheerio('.h2_title', innerHtml).text().replace(/ss+/g, ' ');
let rating = cheerio('#summary_section > div > div.col-md-6.summary-description > div:nth-child(2) > div.col-md-9 > div > span', innerHtml).text();
let name = e.children[0].data;
//console.log(name)
return {
title,
rating,
name
}
}).get();
return Promise.all(businessMap);
}
};
getCompanies()
.then(result => {
const transformed = new otcsv(result);
return transformed.toDisk('./output.csv');
})
.then(() => console.log('Done'));
And one more question: Can anyone please explain this line
const businessMap = cheerio('h3 > a', html).map(async (i, e) => {
Thanks in advance
Can you please show me one example. i am very new to coding @mattigrthr
Hey @ArunkumarS, sure, I added one. Let me know if that works for you now.