// See the scraper information

//

const rdf    = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
const dc     = "http://purl.org/dc/elements/1.1/";
const bibtex = "http://www.ontoweb.org/ontology/1#";

var monthNames = [
    "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "oct", "nov", "dec"
];

function makeDate(month, year) {
    month = month.toLowerCase();
    
    for (i = 0; i < monthNames.length; i++) {
        if (month.indexOf(monthNames[i]) == 0) {
            break;
        }
    }
    
    if (i < monthNames.length) {
        i++;
    } else {
        i = 1;
    }
    
    return year + "-" + (i < 10 ? "0" + i : i) + "-00T00:00:00Z";
}

function scrapeAnEntry(document, trElement, divElement) {
    
    var authors = utilities.trimString(divElement.innerHTML);
    
    var aElements = document.evaluate("a", divElement.parentNode, null, XPathResult.ANY_TYPE,null);
    var aElement = aElements.iterateNext();
    while (aElement) {
        if (aElement.href.indexOf("citation.cfm?id=") >= 0) {
            var uri = aElement.href;
            var title = utilities.trimString(aElement.innerHTML);
            break;
        }
        aElement = aElements.iterateNext();
    }
    
    var tableElement = divElement.parentNode.parentNode.parentNode;

    try {
        var date = utilities.trimString(tableElement.rows.item(1).cells.item(0).innerHTML);
    } catch (e) {
        log(e);
    }

    try {
        var addinfo = utilities.trimString(
            document.evaluate(".//strong", tableElement.rows.item(1).cells.item(2), null, XPathResult.ANY_TYPE,null)
                .iterateNext().innerHTML);
    } catch (e) {
        log(e);
    }

    try {
        var abstrakt = utilities.trimString(
            document.evaluate("./td/div", tableElement.rows.item(3), null, XPathResult.ANY_TYPE,null)
                .iterateNext().innerHTML);
    } catch (e) {
        log(e);
    }

    try {
        var img = document.evaluate(".//img", trElement.cells.item(2), null, XPathResult.ANY_TYPE,null).iterateNext();
        var relevanceURL = img.src;
        var slash = relevanceURL.lastIndexOf("/");
        var dot = relevanceURL.lastIndexOf(".");
        var relevance = relevanceURL.substring(slash + 1, dot);
    } catch (e) {
        log(e);
    }
    
    //log(uri + "\n" + title + "\n" + authors + "\n" + date + "\n" + addinfo + "\n" + abstrakt);
    
    if (uri) {
        data.addStatement(uri, rdf + "type", bibtex + "Publication", false);
        data.addStatement(uri, bibtex + "link", uri, true);
        if (title) {
            data.addStatement(uri, dc + "title", title, true);
        }
        if (authors) {
            var a = authors.split(/, */);
            for (j = 0; j < a.length; j++) {
                data.addStatement(uri, dc + "author", a[j], true);
            }
        }
        if (date) {
            var space = date.indexOf(" ");
            data.addStatement(uri, dc + "date", 
                makeDate(date.substr(0, space), date.substr(space + 1)), true);
        }
        if (addinfo) {
            data.addStatement(uri, bibtex + "details", addinfo, true);
        }
        if (abstrakt) {
            data.addStatement(uri, bibtex + "abstract", abstrakt, true);
        }
        if (relevance) {
            data.addStatement(uri, bibtex + "relevance", relevance, true);
        }
    }
}

function scrapePage(document) {
    var divElements = document.evaluate("//div", document, null, XPathResult.ANY_TYPE,null);
    var divElement = divElements.iterateNext();
    while (divElement) {
        if (divElement.className == "authors") {
            scrapeAnEntry(
                document, 
                divElement.parentNode.parentNode.parentNode.parentNode.parentNode.parentNode, 
                divElement);
        }
        divElement = divElements.iterateNext();
    }
}


function getPagesToScrape() {
    var urls = [];
    var addedURLs = [];
    
    var aElements = document.evaluate("//td/a", document, null, XPathResult.ANY_TYPE,null);
    var aElement = aElements.iterateNext();
    while (aElement) {
        var href = aElement.href;
        if (href.indexOf("results.cfm?query=") >= 0 && !(addedURLs[href])) {
            urls.push(href);
            addedURLs[href] = true;
        }
        aElement = aElements.iterateNext();
    }
    
    return urls;
}


var urls = getPagesToScrape();

for each (var url in urls) {
    piggybank.scrapeURL(url, scrapePage);
}

//