init
This commit is contained in:
commit
6019c7eec8
|
|
@ -0,0 +1 @@
|
||||||
|
node_modules
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"name": "sd",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "test.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"cherio": "^1.0.0-rc.2",
|
||||||
|
"puppeteer": "^23.0.2",
|
||||||
|
"puppeteer-extra": "^3.3.6",
|
||||||
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,140 @@
|
||||||
|
const puppeteerExtra = require('puppeteer-extra')
|
||||||
|
const cheerio = require('cherio')
|
||||||
|
const stealthPlugin = require('puppeteer-extra-plugin-stealth')
|
||||||
|
|
||||||
|
async function searchGoogleMaps() {
|
||||||
|
try {
|
||||||
|
const start = Date.now();
|
||||||
|
|
||||||
|
puppeteerExtra.use(stealthPlugin());
|
||||||
|
|
||||||
|
const browser = await puppeteerExtra.launch({
|
||||||
|
headless: false,
|
||||||
|
executablePath: "",
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
const query = "toko golf jakarta";
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(
|
||||||
|
`https://www.google.com/maps/search/${query.split(" ").join("+")}`
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.log("error going to page");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function autoScroll(page) {
|
||||||
|
await page.evaluate(async () => {
|
||||||
|
const wrapper = document.querySelector('div[role="feed"]');
|
||||||
|
|
||||||
|
await new Promise((resolve, reject) => {
|
||||||
|
var totalHeight = 0;
|
||||||
|
var distance = 1000;
|
||||||
|
var scrollDelay = 3000;
|
||||||
|
|
||||||
|
var timer = setInterval(async () => {
|
||||||
|
var scrollHeightBefore = wrapper.scrollHeight;
|
||||||
|
wrapper.scrollBy(0, distance);
|
||||||
|
totalHeight += distance;
|
||||||
|
|
||||||
|
if (totalHeight >= scrollHeightBefore) {
|
||||||
|
totalHeight = 0;
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, scrollDelay));
|
||||||
|
|
||||||
|
// Calculate scrollHeight after waiting
|
||||||
|
var scrollHeightAfter = wrapper.scrollHeight;
|
||||||
|
|
||||||
|
if (scrollHeightAfter > scrollHeightBefore) {
|
||||||
|
// More content loaded, keep scrolling
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// No more content loaded, stop scrolling
|
||||||
|
clearInterval(timer);
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 200);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
await autoScroll(page);
|
||||||
|
|
||||||
|
const html = await page.content();
|
||||||
|
const pages = await browser.pages();
|
||||||
|
await Promise.all(pages.map((page) => page.close()));
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
console.log("browser closed");
|
||||||
|
|
||||||
|
// get all a tag parent where a tag href includes /maps/place/
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
const aTags = $("a");
|
||||||
|
const parents = [];
|
||||||
|
aTags.each((i, el) => {
|
||||||
|
const href = $(el).attr("href");
|
||||||
|
if (!href) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (href.includes("/maps/place/")) {
|
||||||
|
parents.push($(el).parent());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("parents", parents.length);
|
||||||
|
|
||||||
|
const buisnesses = [];
|
||||||
|
|
||||||
|
parents.forEach((parent) => {
|
||||||
|
const url = parent.find("a").attr("href");
|
||||||
|
// get a tag where data-value="Website"
|
||||||
|
const website = parent.find('a[data-value="Website"]').attr("href");
|
||||||
|
// find a div that includes the class fontHeadlineSmall
|
||||||
|
const storeName = parent.find("div.fontHeadlineSmall").text();
|
||||||
|
// find span that includes class fontBodyMedium
|
||||||
|
const ratingText = parent
|
||||||
|
.find("span.fontBodyMedium > span")
|
||||||
|
.attr("aria-label");
|
||||||
|
|
||||||
|
// get the first div that includes the class fontBodyMedium
|
||||||
|
const bodyDiv = parent.find("div.fontBodyMedium").first();
|
||||||
|
const children = bodyDiv.children();
|
||||||
|
const lastChild = children.last();
|
||||||
|
const firstOfLast = lastChild.children().first();
|
||||||
|
const lastOfLast = lastChild.children().last();
|
||||||
|
|
||||||
|
buisnesses.push({
|
||||||
|
placeId: `ChI${url?.split("?")?.[0]?.split("ChI")?.[1]}`,
|
||||||
|
address: firstOfLast?.text()?.split("·")?.[1]?.trim(),
|
||||||
|
category: firstOfLast?.text()?.split("·")?.[0]?.trim(),
|
||||||
|
phone: lastOfLast?.text()?.split("·")?.[1]?.trim(),
|
||||||
|
googleUrl: url,
|
||||||
|
bizWebsite: website,
|
||||||
|
storeName,
|
||||||
|
ratingText,
|
||||||
|
stars: ratingText?.split("stars")?.[0]?.trim()
|
||||||
|
? Number(ratingText?.split("stars")?.[0]?.trim())
|
||||||
|
: null,
|
||||||
|
numberOfReviews: ratingText
|
||||||
|
?.split("stars")?.[1]
|
||||||
|
?.replace("Reviews", "")
|
||||||
|
?.trim()
|
||||||
|
? Number(
|
||||||
|
ratingText?.split("stars")?.[1]?.replace("Reviews", "")?.trim()
|
||||||
|
)
|
||||||
|
: null,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
const end = Date.now();
|
||||||
|
|
||||||
|
console.log(`time in seconds ${Math.floor((end - start) / 1000)}`);
|
||||||
|
|
||||||
|
console.log(buisnesses);
|
||||||
|
} catch (error) {
|
||||||
|
console.log("error at googleMaps", error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
searchGoogleMaps()
|
||||||
Loading…
Reference in New Issue