Answer the question
In order to leave comments, you need to log in
NodeJS + jsDom synchronous call?
Again I am around and around my problem. I can't solve it at all.
Actually, the task itself is simple, but because of the asynchronous-synchronous nature of the node, I am trampling on a rake and cannot solve it.
The task comes down to simple things - there is an array with URLs. In the loop, using jsdom + jquery, we parse the page, select all the URLs and push them to the end of the array for further processing inside the loop.
Any ideas how to implement?
var jsdom = require("jsdom");<br>
var fs = require("fs");<br>
var jquery = fs.readFileSync("./jquery.js").toString();<br>
<br>
var fields = ['http://some.url/page.html']<br>
<br>
for(var b=0, len = fields.length;b<len;b++){<br>
<br>
jsdom.env({<br>
html: fields[b],<br>
src: [jquery],<br>
done: function (errors, window) {<br>
var $ = window.$;<br>
$("div.pager a").each(function() {<br>
//Push new finded hyperlinks in array<br>
fields.push($(this).attr('href'));<br>
});<br>
<br>
}<br>
});<br>
//recalculate actual length of array to make more loop<br>
len=fields.length;<br>
}<br>
Answer the question
In order to leave comments, you need to log in
var jsdom = require('jsdom');
var fs = require('fs');
var jQuery = fs.readFileSync('jquery.js', 'utf8');
var fields = ['http://some.url/page.html']
var processNextField = function(nextField){
jsdom.env({
html: nextField,
src: [jQuery],
done: function (errors, window) {
var $ = window.$;
$("div.pager a").each(function() {
//Push new finded hyperlinks in array
fields.push($(this).attr('href'));
});
if (fields.length > 0) {
process.nextTick(function(){
processNextField( fields.pop() );
});
}
}
});
}
sometimes I parse sites on node.js and use this node.io
thing
in two passes, in the first run I collect urls into files, in the second run I feed the file with urls through input / output and get what I need at the output.
Unlike jsdom, which just flows like hell when running for a long time and nothing can be done about it, node.io works without problems. The wiki has many clear examples.
If you don't need "div > a" constructs, the node.io parser only works with CSS1 selectors, though jsdom can be included as well.
Inside done, you need to check if the work is finished and call callback ...
var waiting = fields.length;
for(var b=0, len = fields.length;b<len;b++){
jsdom.env({
html: fields[b],
src: [jquery],
done: function (errors, window) {
var $ = window.$;
$("div.pager a").each(function() {
//Push new finded hyperlinks in array
fields.push($(this).attr('href'));
});
if (--waiting == 0)
callback(fields);
}
});
}
So, I apologize for the last answer - I did not understand that it was a recursive traversal that was required.
Unfortunately, I do not know node.js and jsdom to describe everything in more detail.
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question