Closed atian25 closed 6 years ago
after reading src, sloved by using async
:
and question is whether it should provide a suger method scraperPromise.request(optionFunction)
var expect = require('chai').expect;
var _ = require('lodash');
var URL = require('url');
var scraperjs = require('scraperjs');
var request = require('request');
describe('scraperjs', function(){
it('should chain', function(done){
var scraperPromise = scraperjs.StaticScraper.create();
scraperPromise
.get('http://echo.jsontest.com/k1/v1')
.scrape(function($){
return $.html();
}, function(result){
return result;
})
.async(function(callback, utils) {
var result = utils.lastReturn;
request({
url: 'http://echo.jsontest.com/k2/' + result,
method: 'get'
}, function(error, response, body){
utils.lastReturn = {
first: result,
last: body
};
callback();
});
})
.then(function(utils){
var result = utils.lastReturn;
done();
})
});
});
It could be resolved by "unrelaxing" the promise chain. Right now the request and get promises can be declared at any point in the chain and will be executed first. The API would look like this,
scraper.
.request(..) // or get
.scrape(..)
.request(..) // or get
.scrape(..)
I'll look into it.
after research all source, found maybe I was wrong.
var scraperPromise = scraperjs.StaticScraper.create();
scraperPromise
.get('http://echo.jsontest.com/k1/v1')
.scrape(function($){
return $.html();
})
.get('http://echo.jsontest.com/k2/v2')
.scrape(function($){
return $.html();
})
the code what I want is get url1 -> scrape -> get url2 -> scrape
.
but the real is: get url1 -> exec scrape1 -> exec scrape2 -> get url2 -> exec scrape1 -> exec scrape2
so:
onStatusCode
/ scrape
/ 'then' is just a way to register process handlers
request
/ get
is the real main()
, and This promise should be used only once per scraper.
//register process handlers
scraperPromise.scrape(fn, fn).onStatusCode(fn).onError(fn);
// start the work
scraperPromise.get(url);
so we don't need to use last scrape result to create request options, just create another scraper.
and the doc should separate them(register handler method
vs trigger method
) out.
correct me if I was wrong.
sometime we need to use
last scrape result
to createrequest options