In order to retrieve a web page, do as following:
function main(env, args) { var link = 'http://astore.amazon.com/paesia-20?node=22'; var timeout = 60000; // milisecond var doc = env.newJsoup().parse(env.newURL(link), timeout); }
'doc' variable contains object belonging to Document
With 'doc' variable, data can be extracted as following:
function main(env, args) { var link = 'http://astore.amazon.com/paesia-20?node=22'; var timeout = 60000; // milisecond var doc = env.newJsoup().parse(env.newURL(link), timeout); var elements = doc.select('#searchbrowse a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); env.info('Title: ' + title + '\nUrl: ' + url); } }
'elements' variable is object of Elements class.
'element' variable is object of Element class.
No comments:
Post a Comment