Only the first crawler runs in function
# crawlee-js
w
When running the example below, only the first crawler (crawler1) runs, and the second crawler (crawler2) does not work as intended. Running either crawler individually works fine, and changing the URL to something completely different also works fine. Here is an example.
Copy code
import { PlaywrightCrawler } from 'crawlee';

export async function runExample() {
  const testPage1 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';

  const testPage2 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';

  const crawler1 = new PlaywrightCrawler({
    requestHandler: async ({ request, page, log }) => {
      const title = await page.title();
      log.info(`URL: ${request.url}\nTITLE: ${title}`);
    },
    launchContext: {
      launchOptions: {
        args: ['--ignore-certificate-errors'],
      },
    },
  });

  const crawler2 = new PlaywrightCrawler({
    requestHandler: async ({ request, page, log }) => {
      const title = await page.title();
      log.info(`URL: ${request.url}\nTITLE: ${title}`);
    },
    launchContext: {
      launchOptions: {
        args: ['--ignore-certificate-errors'],
      },
    },
  });

  await crawler1.run([testPage1]);
  await crawler2.run([testPage2]);
}

runExample();
h
Someone will reply to you shortly. In the meantime, this might help:
w
Here is the output i get
Copy code
INFO  PlaywrightCrawler: Starting the crawler.
INFO  PlaywrightCrawler: URL: https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5
TITLE:
INFO  PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down.
INFO  PlaywrightCrawler: Final request statistics: {"requestsFinished":1,"requestsFailed":0,"retryHistogram":[1],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":782,"requestsFinishedPerMinute":55,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"re,"requestsFailedPerMinute":0,"requestTotalDurationMillis":782,"requestsTotal":1,"crawlerRuntimeMillis":1083}
INFO  PlaywrightCrawler: Finished! Total 1 requests: 1 succeeded, 0 failed. {"terminal":true}
INFO  PlaywrightCrawler: Starting the crawler.
INFO  PlaywrightCrawler: All requests from the queue have been processed, the crawler will shut down.
INFO  PlaywrightCrawler: Final request statistics: {"requestsFinished":0,"requestsFailed":0,"retryHistogram":[],"requestAvgFailedDurationMillis":null,"requestAvgFinishedDurationMillis":null,"requestsFinishedPerMinute":0,"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"reque"requestsFailedPerMinute":0,"requestTotalDurationMillis":0,"requestsTotal":0,"crawlerRuntimeMillis":238}
INFO  PlaywrightCrawler: Finished! Total 0 requests: 0 succeeded, 0 failed. {"terminal":true}
h
This is very nonstandard. I think this way crawlers have only one shared requestQueue. Try to put to each crawler different requestList/requestQueue in the options. simple example:
Copy code
javascript
const requestList1 = await RequestList.open('my-request-list1', [
  'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5'
const crawler1 = new PlaywrightCrawler({
    requestList:requestList1,
    requestHandler: async ({ request, page, log }) => {
    ....
});
await crawler1.run();
w
Ya i agree, I was initially trying to do something like this
Copy code
import { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee';


export async function runExample() {
  const testPage1 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';

  const testPage2 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';

  const router = createPlaywrightRouter();
  router.addDefaultHandler(async (params) => {
    const { page, log, request, enqueueLinks } = params;

    const title = await page.title();
    log.info(`URL: ${request.url}\nTITLE: ${title}`);

    await enqueueLinks({
      label: 'ROUTE_2',
      urls: [testPage2],
    });
  });

  router.addHandler('ROUTE_2', async (params) => {
    const { page, log, request } = params;

    const title = await page.title();
    log.info(`URL: ${request.url}\nTITLE: ${title}`);
  });

  const crawler1 = new PlaywrightCrawler({
    requestHandler: router,
    launchContext: {
      launchOptions: {
        args: ['--ignore-certificate-errors'],
      },
    },
  });

  await crawler1.run([testPage1]);
}

runExample();
However it doesnt seem to enqueue the links for the second route either. I have it setup similar for another site and it works fine but this doesnt. Any ideas?
h
should work like this
Copy code
javascript
import { createPlaywrightRouter, PlaywrightCrawler } from 'crawlee';


export async function runExample() {
  const testPage1 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5';

  const testPage2 =
    'https://inspections.healthunit.com/HedgehogPortal/#/18fbee00-f0a3-49e3-b323-9153b6c4924c/disclosure/facility/3448568d-737b-4b41-ab63-1f2d7a2252b5/inspection/ac3196c5-13e6-486c-8b9c-b85dd019fc05';

  const router = createPlaywrightRouter();
  router.addDefaultHandler(async (params) => {
    const { page, log, request, crawler} = params;

    const title = await page.title();
    log.info(`URL: ${request.url}\nTITLE: ${title}`);

    await crawler.addRequests([{
      label: 'ROUTE_2',
      url: testPage2,
    }]);
  });

  router.addHandler('ROUTE_2', async (params) => {
    const { page, log, request } = params;

    const title = await page.title();
    log.info(`URL: ${request.url}\nTITLE: ${title}`);
  });

  const crawler1 = new PlaywrightCrawler({
    requestHandler: router,
    launchContext: {
      launchOptions: {
        args: ['--ignore-certificate-errors'],
      },
    },
  });

  await crawler1.run([testPage1]);
}

runExample();
w
*edit re ran after copy pasting again and ran fine but still doesnt run the second route.
h
oh, maybe it takes it as the same url because of
#
so try to add one line here
Copy code
javascript
 await crawler.addRequests([{
      label: 'ROUTE_2',
      url: testPage2,
      uniqueKey:testPage2,
    }]);
  });
w
That works! Thank you! I was losing my mind trying to debug the issue 😭
Really appreciate it!