This commit is contained in:
ZKRA000 2025-01-15 16:08:19 +07:00
commit 6019c7eec8
4 changed files with 3011 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
node_modules

2853
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

17
package.json Normal file
View File

@ -0,0 +1,17 @@
{
"name": "sd",
"version": "1.0.0",
"description": "",
"main": "test.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"cherio": "^1.0.0-rc.2",
"puppeteer": "^23.0.2",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
}
}

140
test.js Normal file
View File

@ -0,0 +1,140 @@
const puppeteerExtra = require('puppeteer-extra')
const cheerio = require('cherio')
const stealthPlugin = require('puppeteer-extra-plugin-stealth')
async function searchGoogleMaps() {
try {
const start = Date.now();
puppeteerExtra.use(stealthPlugin());
const browser = await puppeteerExtra.launch({
headless: false,
executablePath: "",
});
const page = await browser.newPage();
const query = "toko golf jakarta";
try {
await page.goto(
`https://www.google.com/maps/search/${query.split(" ").join("+")}`
);
} catch (error) {
console.log("error going to page");
}
async function autoScroll(page) {
await page.evaluate(async () => {
const wrapper = document.querySelector('div[role="feed"]');
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 1000;
var scrollDelay = 3000;
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight;
wrapper.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeightBefore) {
totalHeight = 0;
await new Promise((resolve) => setTimeout(resolve, scrollDelay));
// Calculate scrollHeight after waiting
var scrollHeightAfter = wrapper.scrollHeight;
if (scrollHeightAfter > scrollHeightBefore) {
// More content loaded, keep scrolling
return;
} else {
// No more content loaded, stop scrolling
clearInterval(timer);
resolve();
}
}
}, 200);
});
});
}
await autoScroll(page);
const html = await page.content();
const pages = await browser.pages();
await Promise.all(pages.map((page) => page.close()));
await browser.close();
console.log("browser closed");
// get all a tag parent where a tag href includes /maps/place/
const $ = cheerio.load(html);
const aTags = $("a");
const parents = [];
aTags.each((i, el) => {
const href = $(el).attr("href");
if (!href) {
return;
}
if (href.includes("/maps/place/")) {
parents.push($(el).parent());
}
});
console.log("parents", parents.length);
const buisnesses = [];
parents.forEach((parent) => {
const url = parent.find("a").attr("href");
// get a tag where data-value="Website"
const website = parent.find('a[data-value="Website"]').attr("href");
// find a div that includes the class fontHeadlineSmall
const storeName = parent.find("div.fontHeadlineSmall").text();
// find span that includes class fontBodyMedium
const ratingText = parent
.find("span.fontBodyMedium > span")
.attr("aria-label");
// get the first div that includes the class fontBodyMedium
const bodyDiv = parent.find("div.fontBodyMedium").first();
const children = bodyDiv.children();
const lastChild = children.last();
const firstOfLast = lastChild.children().first();
const lastOfLast = lastChild.children().last();
buisnesses.push({
placeId: `ChI${url?.split("?")?.[0]?.split("ChI")?.[1]}`,
address: firstOfLast?.text()?.split("·")?.[1]?.trim(),
category: firstOfLast?.text()?.split("·")?.[0]?.trim(),
phone: lastOfLast?.text()?.split("·")?.[1]?.trim(),
googleUrl: url,
bizWebsite: website,
storeName,
ratingText,
stars: ratingText?.split("stars")?.[0]?.trim()
? Number(ratingText?.split("stars")?.[0]?.trim())
: null,
numberOfReviews: ratingText
?.split("stars")?.[1]
?.replace("Reviews", "")
?.trim()
? Number(
ratingText?.split("stars")?.[1]?.replace("Reviews", "")?.trim()
)
: null,
});
});
const end = Date.now();
console.log(`time in seconds ${Math.floor((end - start) / 1000)}`);
console.log(buisnesses);
} catch (error) {
console.log("error at googleMaps", error.message);
}
}
searchGoogleMaps()