could not find expected browser chrome locally 问题解决

54 min read

could not find expected browser chrome locally 问题解决

puppeteer运行出现: Could not find browser revision 809590. Run “PUPPETEER_PRODUCT=firefox npm install” or "PUPPETEER_PRODUCT=firefox yarn install"报错,

显然时没找到对应版本的浏览器。

GitHub上issues上提供解决方案试了一圈度没用。后来在官方api文档里发现

createBrowserFetcher是一个官方的浏览器版本管理工具,只需用指定版本就能对应下载,回调返回安装的路径。

实例:

const puppeteer = require("puppeteer");
const browserFetcher = puppeteer.createBrowserFetcher();

browserFetcher.download("809590").then((res) => {
  puppeteer
    .launch({
      executablePath: res.executablePath, //chrome执行路径
      headless: false, //浏览器无头模式
    })
    .then(async (browser) => {
      // 保存 Endpoint,这样就可以重新连接  Chromium
      const browserWSEndpoint = browser.wsEndpoint();
      // 从Chromium 断开连接
      browser.disconnect();

      // 使用endpoint 重新和 Chromiunm 建立连接
      const browser2 = await puppeteer.connect({ browserWSEndpoint });
      // Close Chromium
      // await browser2.close();
    });
});

或者使用 headless 模式

/**
* load blog.csdn.net article to local files
**/
const puppeteer = require('puppeteer');
//emulate iphone
const userAgent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1';
const workPath = './contents';
const fs = require("fs");
if (!fs.existsSync(workPath)) {
        fs.mkdirSync(workPath)
}
//base url
const rootUrl = 'https://blog.csdn.net/';
//max wait milliseconds
const maxWait = 100;
//max loop scroll times
const makLoop = 10;
(async () => {
    let url;
    let countUrl=0;
    const browser = await puppeteer.launch({headless: false});//set headless: true will hide chromium UI
    const page = await browser.newPage();
    await page.setUserAgent(userAgent);
    await page.setViewport({width:414, height:736});
    await page.setRequestInterception(true);
    //filter to block images
    page.on('request', request => {
    if (request.resourceType() === 'image')
      request.abort();
    else
      request.continue();
    });
    await page.goto(rootUrl);
    
    for(let i= 0; i<makLoop;i++){
        try{
            await page.evaluate(()=>window.scrollTo(0, document.body.scrollHeight));
            await page.waitForNavigation({timeout:maxWait,waitUntil: ['networkidle0']});
        }catch(err){
            console.log('scroll to bottom and then wait '+maxWait+'ms.');
        }
    }
    await page.screenshot({path: workPath+'/screenshot.png',fullPage: true, quality :100, type :'jpeg'});
    //#feedlist_id li[data-type="blog"] a
    const sel = '#feedlist_id li[data-type="blog"] h2 a';
    const hrefs = await page.evaluate((sel) => {
        let elements = Array.from(document.querySelectorAll(sel));
        let links = elements.map(element => {
            return element.href
        })
        return links;
    }, sel);
    console.log('total links: '+hrefs.length);
    process();
  async function process(){
    if(countUrl<hrefs.length){
        url = hrefs[countUrl];
        countUrl++;
    }else{
        browser.close();
        return;
    }
    console.log('processing url: '+url);
    try{
        const tab = await browser.newPage();
        await tab.setUserAgent(userAgent);
        await tab.setViewport({width:414, height:736});
        await tab.setRequestInterception(true);
        //filter to block images
        tab.on('request', request => {
        if (request.resourceType() === 'image')
          request.abort();
        else
          request.continue();
        });
        await tab.goto(url);
        //execute tap request
        try{
            await tab.tap('.read_more_btn');
        }catch(err){
            console.log('there\'s none read more button. No need to TAP');
        }
        let title = await tab.evaluate(() => document.querySelector('#article .article_title').innerText);
        let contents = await tab.evaluate(() => document.querySelector('#article .article_content').innerText);
        contents = 'TITLE: '+title+'\nURL: '+url+'\nCONTENTS: \n'+contents;
        const fs = require("fs");
        fs.writeFileSync(workPath+'/'+tab.url().substring(tab.url().lastIndexOf('/'),tab.url().length)+'.txt',contents);
        console.log(title + " has been downloaded to local.");
        await tab.close();
    }catch(err){
        console.log('url: '+tab.url()+' \n'+err.toString());
    }finally{
        process();
    }
    
  }
})();