Tuesday, 24 April 2012

Extract data from web page

In order to retrieve a web page, do as following:

1function main(env, args) {
2 var link = 'http://astore.amazon.com/paesia-20?node=22';
3 var timeout = 60000; // milisecond
4 var doc = env.newJsoup().parse(env.newURL(link), timeout);
5}
function main(env, args) {
  var link = 'http://astore.amazon.com/paesia-20?node=22';
  var timeout = 60000; // milisecond
  var doc = env.newJsoup().parse(env.newURL(link), timeout);
}

'doc' variable contains object belonging to Document

With 'doc' variable, data can be extracted as following:

1function main(env, args) {
2 var link = 'http://astore.amazon.com/paesia-20?node=22';
3 var timeout = 60000; // milisecond
4 var doc = env.newJsoup().parse(env.newURL(link), timeout);
5
6 var elements = doc.select('#searchbrowse a');
7 for (var i = 0; i < elements.size(); i++) {
8 var element = elements.get(i);
9 var title = element.text();
10 var url = element.attr('href');
11 env.info('Title: ' + title + '\nUrl: ' + url);
12 }
13}
function main(env, args) {
  var link = 'http://astore.amazon.com/paesia-20?node=22';
  var timeout = 60000; // milisecond
  var doc = env.newJsoup().parse(env.newURL(link), timeout);

  var elements = doc.select('#searchbrowse a');
  for (var i = 0; i < elements.size(); i++) {
    var element = elements.get(i);
    var title = element.text();
    var url = element.attr('href');
    env.info('Title: ' + title + '\nUrl: ' + url);
  }
}

'elements' variable is object of Elements class.

'element' variable is object of Element class.

No comments:

Post a Comment