Walker26
01/31/2025, 10:53 AMimport { PlaywrightCrawler } from 'crawlee';
export async function runExample() {
const testPage1 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';
const testPage2 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';
const crawler1 = new PlaywrightCrawler({
requestHandler: async ({ request, page, log }) => {
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
},
launchContext: {
launchOptions: {
args: ['--ignore-certificate-errors'],
},
},
});
const crawler2 = new PlaywrightCrawler({
requestHandler: async ({ request, page, log }) => {
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
},
launchContext: {
launchOptions: {
args: ['--ignore-certificate-errors'],
},
},
});
await crawler1.run([testPage1]);
await crawler2.run([testPage2]);
}
runExample();
Hall
01/31/2025, 10:53 AMWalker26
01/31/2025, 10:53 AMINFO PlaywrightCrawler: Starting the crawler.
INFO PlaywrightCrawler: URL: https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5
TITLE:
INFO PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down.
INFO PlaywrightCrawler: Final request statistics: {"requestsFinished":1,"requestsFailed":0,"retryHistogram":[1],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":782,"requestsFinishedPerMinute":55,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"re,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"requestsTotal":1,"crawlerRuntimeMillis":1083}
INFO PlaywrightCrawler: Finished! Total 1 requests: 1 succeeded, 0 failed. {"terminal":true}
INFO PlaywrightCrawler: Starting the crawler.
INFO PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down.
INFO PlaywrightCrawler: Final request statistics: {"requestsFinished":0,"requestsFailed":0,"retryHistogram":[],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":null,"requestsFinishedPerMinute":0,"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"reque"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"requestsTotal":0,"crawlerRuntimeMillis":238}
INFO PlaywrightCrawler: Finished! Total 0 requests: 0 succeeded, 0 failed. {"terminal":true}
HonzaS
01/31/2025, 11:32 AMjavascript
const requestList1 = await RequestList.open('my-request-list1', [
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5'
const crawler1 = new PlaywrightCrawler({
requestList:requestList1,
requestHandler: async ({ request, page, log }) => {
....
});
await crawler1.run();
Walker26
01/31/2025, 4:46 PMimport { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee';
export async function runExample() {
const testPage1 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';
const testPage2 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';
const router = createPlaywrightRouter();
router.addDefaultHandler(async (params) => {
const { page, log, request, enqueueLinks } = params;
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
await enqueueLinks({
label: 'ROUTE_2',
urls: [testPage2],
});
});
router.addHandler('ROUTE_2', async (params) => {
const { page, log, request } = params;
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
});
const crawler1 = new PlaywrightCrawler({
requestHandler: router,
launchContext: {
launchOptions: {
args: ['--ignore-certificate-errors'],
},
},
});
await crawler1.run([testPage1]);
}
runExample();
However it doesnt seem to enqueue the links for the second route either. I have it setup similar for another site and it works fine but this doesnt. Any ideas?HonzaS
01/31/2025, 4:56 PMjavascript
import { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee';
export async function runExample() {
const testPage1 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';
const testPage2 =
'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';
const router = createPlaywrightRouter();
router.addDefaultHandler(async (params) => {
const { page, log, request, crawler} = params;
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
await crawler.addRequests([{
label: 'ROUTE_2',
url: testPage2,
}]);
});
router.addHandler('ROUTE_2', async (params) => {
const { page, log, request } = params;
const title = await page.title();
log.info(`URL: ${request.url}\nTITLE: ${title}`);
});
const crawler1 = new PlaywrightCrawler({
requestHandler: router,
launchContext: {
launchOptions: {
args: ['--ignore-certificate-errors'],
},
},
});
await crawler1.run([testPage1]);
}
runExample();
Walker26
01/31/2025, 5:05 PMHonzaS
01/31/2025, 5:08 PM#
HonzaS
01/31/2025, 5:08 PMjavascript
await crawler.addRequests([{
label: 'ROUTE_2',
url: testPage2,
uniqueKey:testPage2,
}]);
});
Walker26
01/31/2025, 5:09 PMWalker26
01/31/2025, 5:10 PM