Phantomjs教你如何实现浏览器截图并上传截图文件

phantomjs

是一个无界面的webkit内核浏览器，你可以把它当作一个没有界面的Safari浏览器

主要是用来做自动化测试。但是可以实现访问一个url链接，对这个链接里面所有的内容进行截屏。

phantomjs可以实现的功能

无界面的网站测试网页截屏页面自动化网络监控

mac环境下安装phantomjs

下载phantomjs 下载后，直接解压,phantomjs是一个应用程序解压后，可以看到phantomjs有一个example的文件夹，有很多相关例子可以参考，包括计算链接加载时间，child_process等相关功能将phantomjs设置为全局，配置环境变量，将下面指令，赋值到 ~/.bash_profile

export PATH="$PATH:/Users/kai/phantomjs/bin/"

判断是否成功

phantomjs --version 显示phantomjs版本号

phantomjs实现截屏

在命令行直接执行 phantomjs test.js 就可以获取到百度网页的截图。

page.open('http://www.baidu.com', function(status){
  if (status != 'success') {
    console.log('error')
    phantom.exit();
  }
  window.setTimeout(function() {
    page.render('hello world.jpg', {format: 'jpeg', quality: '100'});
    console.log(status)
    phantom.exit();
  }, 5000)
});

在NodeJs环境下使用phantomjs

有一点要明确，phantomjs和NodeJs是两个独立的两个应用程序，因此在NodeJs中不能把phantomjs当作是NodeJs的模块去处理.在NodeJs中使用phantomjs 应该使用child_process模块，新建子进程，调用phantomjs，去执行文件的处理， child_process模块请参照NodeJs官方文档.主要功能就是实现对shell命令的处理

Node对phantomjs的封装

cnpm install --save-dev phantomjs-prebuilt

NodeJs下使用phantomjs实现网页截图

sceencapture.coffee

path = require 'path'
spawn = require('child_process').spawn
uuid = require 'node-uuid'
ImageService = require '../image.coffee'
TemplateService = require '../template.coffee'
PhantomJs = require 'phantomjs-prebuilt'
PhantomJsExcutePath = PhantomJs.path

ScreenCaptureService = {
    regPhantomJsProgess: (params)->
        _this = @
        execPath = path.join(__dirname, 'capture.js')

        # url = 'http://www.rabbitpre.com/template/preview/spa/template/92eef600-d279-4fb5-9cf9-2bb39ac1e805?mobile=1'
        # url = 'http://test.rabbitpre.com/template/preview/spa/template/69977076-5b7c-4d3b-a162-ea2f6392bf6b'
        url = "#{params.previewUrl}?mobile=1&captureimg=1"
        height = params.height || 1000

        key = "#{uuid.v4()}"
        fileName = "#{key}.jpg"

        filePath = path.resolve(__dirname, "../../assets/dist/captureimg/#{fileName}")

        # 这里spawn调用子进程，命令行的处理， 还有一种方式：直接调用phantomjs。
        # 但是这种方式的弊端就是，每台 服务器都 必须安装phantomjs。
        # process = spawn('phantomjs', [execPath, url, filePath, height])

        process = spawn(PhantomJsExcutePath, [execPath, url, filePath, height])
        process.stdout.setEncoding('utf8')

        process.stdout.on "data", (data)->
            code = data.replace(/[\r\n]/g,"")
            if code is 'success'
                # 网页截屏后的处理，包括将图片上传至CDN上。
                opts = {
                    key: key
                    billKey: params.billKey
                    filePath: filePath
                }
                _this.updateAppTemplateSampleImg opts

        process.stderr.on 'data', (data)->
            console.log("stderr"+data)

        process.on 'exit', (code)->
            console.log('phantomjs-child process exited with code ')

    # 并更新当前模版的封面图片，当截屏成功后，将图片文件上传至cdn
    updateAppTemplateSampleImg: (params)->
        billKey = params.billKey
        filePath = params.filePath
        key = params.key
        # 上传文件
        ImageService.uploadCaptureImg filePath, key, (err, result)->
            if err
                console.log err
                return
}

capture.js! 为什么不是capture.coffee？phantomjs只能执行js文件

var page  = require('webpage').create();

var system = require('system');

var url = system.args[1];
var filePath = system.args[2];
var height = system.args[3];

page.viewportSize = { 
  width: 320, 
  height: parseInt(height)
};

page.open(url, function(status){
  if (status != 'success') {
    console.log('error')
    phantom.exit();
  }

  window.setTimeout(function() {
    page.render(filePath, {format: 'jpeg', quality: '100'});
    console.log(status)
    phantom.exit();
  }, 5000)
});

这样，就实现了Node下链接的截屏。

注意的几个地方

var system = require('system');这个模块不是Node的模块，这是phantomjs的模块。NodeJs和phantomjs没有半毛钱关系。
console.log(status),要特别注意！打印出的数据 才会进入 process.stdout.on "data", (data)->！如果没有console.log()，将没有返回数据。同时，如果在这个js中有很多个 console.log(), 都会将打印的数据当作是返回结果。

看到这里是不是以为大功告成了？我跟你港，你还是太年轻。

第一个问题：使用Node封装好的phantomjs如何能看到报错信息？你能保证截屏的效果能和你预期的一致么？NO. phantomjs对ES6特性支持不是很友好哦。前面已经说了，phantomjs是一个无界面的webkit内核浏览器，你可以把它当作一个没有界面的Safari浏览器。只要是浏览器就会兼容性的问题

第二个问题：页面资源加载。能保证所有页面资源加载能快速加载完毕？尤其是图片的加载，当某个或者某些图片没有被加载完成，应不应该截图呢？NO.

碰到一个问题，是因为感觉所有配置和文档上一模一样，当请求百度，可以正确截图。而当请求一个作品链接，发现无论如何都截取不到这个链接的图片。错误信息也没有。如何调试呢？换用命令行，不用Node封装的phantomjs模块 phantomjs capturejs 看到打印日志，以及错误信息 ReferenceError: Can't find variable: Promise. 谷歌查看issue phantomjs版本有对ES6特性的不支持。解决方式：在页面入口文件引入 babel-polyfill
判断页面资源全部加载完毕之后，再去截屏。page.onResourceRequested 和 page.onResourceReceived
```
var page  = require('webpage').create();
```

var system = require('system');

var url = system.args[1]; var filePath = system.args[2]; var height = system.args[3];

page.viewportSize = { width: 320, height: parseInt(height) };

var requestsArray = [];

page.onResourceRequested = function(requestData, networkRequest) { requestsArray.push(requestData.id); };

page.onResourceReceived = function(response) { var index = requestsArray.indexOf(response.id); requestsArray.splice(index, 1); };

page.evaluate(function() { //有些页面是做了懒加载，因此将滚动条拖动到某个范围值得时候，图片才会去加载 window.scrollTo(0, 10000); });

page.open(url, function(status) { page.evaluate(function() { window.scrollTo(0, 10000) }); var interval = setInterval(function () { page.evaluate(function() { window.scrollTo(0, 10000) }); if(requestsArray.length === 0) { clearInterval(interval); page.render(filePath, {format: 'jpeg', quality: '100'}); console.log(status); phantom.exit(); } }, 500); });



这就是phantomjs截屏。这只是phantomjs的一个功能，还有其他的功能，有待研究。

PaicFE / blog