In order to retrieve a web page, do as following:
1 | function main(env, args) { |
2 | var link = 'http://astore.amazon.com/paesia-20?node=22'; |
3 | var timeout = 60000; // milisecond |
4 | var doc = env.newJsoup().parse(env.newURL(link), timeout); |
5 | } |
function main(env, args) { var link = 'http://astore.amazon.com/paesia-20?node=22'; var timeout = 60000; // milisecond var doc = env.newJsoup().parse(env.newURL(link), timeout); }
'doc' variable contains object belonging to Document
With 'doc' variable, data can be extracted as following:
1 | function main(env, args) { |
2 | var link = 'http://astore.amazon.com/paesia-20?node=22'; |
3 | var timeout = 60000; // milisecond |
4 | var doc = env.newJsoup().parse(env.newURL(link), timeout); |
5 | |
6 | var elements = doc.select('#searchbrowse a'); |
7 | for (var i = 0; i < elements.size(); i++) { |
8 | var element = elements.get(i); |
9 | var title = element.text(); |
10 | var url = element.attr('href'); |
11 | env.info('Title: ' + title + '\nUrl: ' + url); |
12 | } |
13 | } |
function main(env, args) { var link = 'http://astore.amazon.com/paesia-20?node=22'; var timeout = 60000; // milisecond var doc = env.newJsoup().parse(env.newURL(link), timeout); var elements = doc.select('#searchbrowse a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); env.info('Title: ' + title + '\nUrl: ' + url); } }
'elements' variable is object of Elements class.
'element' variable is object of Element class.
No comments:
Post a Comment