// See also: Orkut Friends Scraper

//
// See also: http://simile.mit.edu/wiki/Orkut_Friends_Scraper

//
// Collector is an abstraction for scraping
//

function Collector() {
  this.uri = null;
  this.outputNamespaces = {};
  this.outboundVocabulary = {};
}

Collector.prototype = { 

  // Our focus of Attention in the source document
  document: null,
  currentElement: null,
  currentElements: null,

  // Our focus of attention in the model being built.
  uri: null,

  // Tools for picking apart the source document.

  nsResolver: null,

  setDoc: function(doc) {
    this.document = doc;
    this.currentElement = doc;
    this.currentElements = null;
    var namespace = doc.documentElement.namespaceURI;
    this.nsResolver = null;
    if (namespace) {
      this.nsResolver = function(prefix) { return (prefix == 'x') ? namespace : null };
    };
  },
  setElmts: function(xpath) {
    this.currentElements = 
    utilities.gatherElementsOnXPath(this.document, this.currentElement, xpath, this.nsResolver);
  },

  mapElmts: function(start, step, func){
    var len = this.currentElements.length;
    for(var i = start; i < len; i += step) {
      this.currentElement = this.currentElements[i];
      //log(this.currentElements);
      func(i,this.currentElement);
    }
  },

  getNode: function(xpath){
    return this.document.evaluate(xpath, 
                  this.currentElement,
                  this.nsResolver, 
                  XPathResult.ANY_TYPE,null).iterateNext();
  },

  // Tools for assembling the model.
  
  outputNamespaces: {},

  addNamespace: function(prefix, urlNamestring){ 
    this.outputNamespaces[prefix] = urlNamestring;
  },

  outboundVocabulary: {},
  
  expandName: function(name) {
    var r = this.outboundVocabulary[name];
    if (!r) {
      var x = name.split(':');
      r = (x.length != 2) ? name : (this.outputNamespaces[x[0]] + x[1]);
      this.outboundVocabulary[name] = r;
    };
    return r;
  },
  
  assertText: function(propertyName, text) {
    data.addStatement(this.uri, 
		      this.expandName(propertyName),
		      text,
		      true);},
  
  assertRelation: function(propertyName, uriText) {
    data.addStatement(this.uri, 
		      this.expandName(propertyName),
		      uriText,
		      false);},

  assertNodeImage: function(propertyName, selector) {
    try {
      var ndQ = this.getNode(selector);
      if (ndQ) { 
        data.addStatement(this.uri, 
                   this.expandName(propertyName),
                   utilities.trimString(ndQ.src),
                   false);
      };
    } catch (e) {
      log("Error seeking <"+this.uri+">'"+property+": "+e);
    }
  },

  setURIFromAnchor: function(xpath, typeName) {
    try{
        this.uri = utilities.trimString(this.getNode(xpath).href);
    } catch(e) {
        log(e);
    };
    data.addStatement(this.uri, 
               this.expandName('rdf:type'),
               this.expandName(typeName),
               false);
  }
};

//
// Now routines specific to the Orkut Friends page.
//

var c = new Collector();
c.addNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
c.addNamespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#');
c.addNamespace('dc', 'http://purl.org/dc/elements/1.1/');
c.addNamespace('foaf', 'http://xmlns.com/foaf/0.1/');
c.addNamespace('loc', 'http://simile.mit.edu/2005/05/ontologies/location#');

// function to collect one friend.
c.collectFriend = function (i) {
  if(c.getNode('./TD[2]/A[1]')) {
    c.setURIFromAnchor('./TD[2]/A[1]', 'foaf:Person');
    c.assertRelation('rdfs:seeAlso', c.getNode('./TD[2]/A[1]').href);
    var name = c.getNode('./TD[4]/A[1]/text()[1]').nodeValue;
    c.assertText('dc:title',name);
    c.assertText('foaf:name',name);
    var address = c.getNode('./TD[4]/BR[1]').nextSibling.nodeValue;
    c.assertText('loc:address', utilities.trimString(address));
    var email = c.getNode('./TD[4]/BR[2]').nextSibling.nodeValue;
    c.assertRelation('foaf:mbox', 'mailto:' + utilities.trimString(email));
    c.assertNodeImage('foaf:depiction', './TD[2]/A[1]/IMG[1]');}};

// function to collect all friends on the page.
c.collectFriends = function (d) {
  c.setDoc(d);
  c.setElmts('//div[@id="friendtable"]/table[@class="friendtable"]/tbody/tr');
  c.mapElmts(2,2,c.collectFriend);
  var AQ = c.getNode('//tr[1]/td[@class="S"]/a[3]')
    || c.getNode('//tr[1]/td[@class="S"]/a[1]');
  if(AQ && AQ.text != 'first'){ 
    piggybank.scrapeURL(AQ.href,
			c.collectFriends, 
			function(e){alert("Failed: "+e);});};};

//
// Finally, just do it!
//

c.collectFriends(document);

//