Ad

How To Print An HTML Document Using Puppeteer?

Recently I started to crawl the web using Puppeteer. Below is a code for extracting a specific product name from the shopping mall.

const puppeteer = require('puppeteer');

(async () => {

    const width = 1600, height = 1040;

    const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };

    const browser = await puppeteer.launch(option);
    const page = await browser.newPage();
    const vp = {width: width, height: height};
    await page.setViewport(vp);

    const navigationPromise = page.waitForNavigation();

    await page.goto('https://shopping.naver.com/home/p/index.nhn');
    await navigationPromise;
    await page.waitFor(2000);

    const textBoxId = 'co_srh_input';
    await page.type('.' + textBoxId, '양말', {delay: 100});
    await page.keyboard.press('Enter');

    await page.waitFor(5000);
    await page.waitForSelector('div.info > a.tit');

    const stores = await page.evaluate(() => {
        const links = Array.from(document.querySelectorAll('div.info > a.tit'));
        return links.map(link => link.innerText).slice(0, 10)   // 10개 제품만 가져오기
    });

    console.log(stores);
    await browser.close();

})();

I have a question. How can I output the crawled results to an HTML document (without using the database)? Please use sample code to explain it.

Ad

Answer

fs.writeFile()

You can use the following write_file function that returns a Promise that resolves or rejects when fs.writeFile() succeeds or fails.

Then, you can await the Promise from within your anonymous, asynchronous function and check whether or not the data was written to the file:

'use strict';

const fs = require('fs');
const puppeteer = require('puppeteer');

const write_file = (file, data) => new Promise((resolve, reject) => {
  fs.writeFile(file, data, 'utf8', error => {
    if (error) {
      console.error(error);
      reject(false);
    } else {
      resolve(true);
    }
  });
});

(async () => {
  
  // ...
  
  const stores = await page.evaluate(() => {
    return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
  });
  
  if (await write_file('example.html', stores.toString()) === false) {
    console.error('Error: Unable to write stores to example.html.');
  }
  
  // ...
  
});
Ad
source: stackoverflow.com
Ad