xpath.js

Summary

Shared utilities for using XPath.

Version: 0.8 $Id: overview-summary-xpath.js.html,v 1.15 2008/02/20 18:47:10 jameso Exp $

Author: James A. Overton


/* ***** BEGIN LICENSE BLOCK *****
 * Licensed under Version: MPL 1.1/GPL 2.0/LGPL 2.1
 * Full Terms at http://mozile.mozdev.org/0.8/LICENSE
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is James A. Overton's code (james@overton.ca).
 *
 * The Initial Developer of the Original Code is James A. Overton.
 * Portions created by the Initial Developer are Copyright (C) 2005-2006
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *	James A. Overton <james@overton.ca>
 *
 * ***** END LICENSE BLOCK ***** */

/**
 * @fileoverview Shared utilities for using XPath.
 * @link http://mozile.mozdev.org 
 * @author James A. Overton <james@overton.ca>
 * @version 0.8
 * $Id: overview-summary-xpath.js.html,v 1.15 2008/02/20 18:47:10 jameso Exp $
 */


mozile.require("mozile.dom");
mozile.require("mozile.xml");
mozile.provide("mozile.xpath.*");


/**
 * A collection of XPath functions.
 * @type Object
 */
mozile.xpath = new Object();
// JSDoc hack
mozile.xpath.prototype = new mozile.Module;



/** 
 * Returns a very simple XPath location for the given node using only the child's index.
 * @param {Node} node The target node.
 * @param {Node} root The root of the XPath expression. Defaults to the document.
 * @type String
 */
mozile.xpath.getSimplePath = function(node, root) {
	if(!node || node.nodeType == undefined || !node.nodeType) return "";
	if(node == root) return "";
	var parent = node.parentNode;
	
	switch(node.nodeType) {
		// This should match up with the namespace resolver.
		// If it has a prefix, use the prefix.
		// If it has a namespaceURI, look it up.		
		case mozile.dom.ATTRIBUTE_NODE:
			if(!parent) parent = node.ownerElement;
			return mozile.xpath.getXPath(parent, root) +"/@"+ node.nodeName;

		case mozile.dom.ELEMENT_NODE:
		case mozile.dom.TEXT_NODE:
		case mozile.dom.COMMENT_NODE:
			if(node.ownerDocument && node.ownerDocument.documentElement == node)
				return "/*[1]";
			else if(node.id && mozile.document.getElementById(node.id) == node)
				return "//*[@id='"+ node.id +"']";
			else return mozile.xpath.getXPath(parent, root) 
				+"/*["+ (mozile.dom.getIndex(node) + 1) +"]";

		default:
			if(parent) return mozile.xpath.getXPath(parent, root);
			else return "";
	}
}

/** 
 * Returns a simple unique XPath location for the given node.
 * @param {Node} node The target node.
 * @param {Node} root The root of the XPath expression. Defaults to the document.
 * @type String
 */
mozile.xpath.getNamedPath = function(node, root) {
	if(!node || node.nodeType == undefined || !node.nodeType) return "";
	if(node == root) return "";
	var parent = node.parentNode;
	
	switch(node.nodeType) {
		// This should match up with the namespace resolver.
		// If it has a prefix, use the prefix.
		// If it has a namespaceURI, look it up.
		case mozile.dom.ELEMENT_NODE:
			var nodeName;
			if(node.prefix) nodeName = node.nodeName;
			//else if(node.namespaceURI) {
			//	var prefix = mozile.dom.lookupPrefix(node, node.namespaceURI);
			//	if(!prefix) prefix = mozile.xml.lookupPrefix(node.namespaceURI);
			//	if(prefix) nodeName = prefix +":"+ node.nodeName;
			//}
			else nodeName = node.nodeName;
			//if(!nodeName) nodeName = "xmlns:"+ node.nodeName;
			if(!nodeName) nodeName = node.nodeName;
			// TODO: This is a hack. Try to fix it.
			if(mozile.dom.isHTML(node)) nodeName = nodeName.toLowerCase();

			if(node.ownerDocument && node.ownerDocument.documentElement == node)
				return "/"+ nodeName +"[1]";
			else if(node.id && mozile.document.getElementById(node.id) == node)
				return "//*[@id='"+ node.id +"']";
			else return mozile.xpath.getXPath(parent, root) 
				+"/"+ nodeName +"["+ mozile.dom.getPosition(node) +"]";
		
		case mozile.dom.ATTRIBUTE_NODE:
			if(!parent) parent = node.ownerElement;
			return mozile.xpath.getXPath(parent, root) +"/@"+ node.nodeName;

		case mozile.dom.TEXT_NODE:
			// Basic caching
			if(node == this._lastNode && root == this._lastRoot)
				return this._lastXPath;
			var xpath = mozile.xpath.getXPath(parent, root) 
				+"/text()["+ mozile.dom.getPosition(node) +"]";
			this._lastNode = node;
			this._lastRoot = root;
			this._lastXPath = xpath;
			return xpath;

		case mozile.dom.COMMENT_NODE:
			return mozile.xpath.getXPath(parent, root) 
				+"/comment()["+ mozile.dom.getPosition(node) +"]";

		default:
			if(parent) return mozile.xpath.getXPath(parent, root);
			else return "";
	}
}


// NOTE: Optimization
mozile.xpath.getXPath = mozile.xpath.getSimplePath;


/**
 * Takes an XPath expression, splits it at "/" characters, and returns a cleaned array.
 * @param {String} expression The XPath expression to be split.
 * @type Array
 */
mozile.xpath.getComponents = function(expression) {
	if(typeof(expression) != "string") return [];
	var components = expression.split("/");
	for(var c=0; c < components.length; c++) {
		if(!components[c]) components.splice(c,1);
	}
	return components;
}

/**
 * Takes a simple XPath expression and returns an object with "name", "localName", and "position" properties for the last node in the path.
 * @param {String} expression The XPath expression to be used.
 * @type Object
 */
mozile.xpath.getComponent = function(expression) {
	var result = new Object();
	var components = mozile.xpath.getComponents(expression);

	if(components.length == 0) return result;
	var component = components[components.length - 1];
	var match = component.match(/(\S+:)?(\S+)\[(\d+)\]|(\S+:)?(\S+)/);
	if(match) {
		if(match[1] && match[2]) {
			result.name = match[1] + match[2];
			result.localName = match[2];
		}
		else if(match[2]) {
			result.name = match[2];
			result.localName = match[2];
		}
		else if(match[4] && match[5]) {
			result.name = match[4] + match[5];
			result.localName = match[5];
		}
		else if(match[5]) {
			result.name = match[5];
			result.localName = match[5];
		}
		if(match[3]) result.position = match[3];
		else result.position = null;
	}

	return result;
}


/**
 * A basic XPath processor. Expects an XPath expression with only indices as positions which selects a single node, and returns that node.
 * Works on paths of the form /html[1]/body[1]/p[3]/text()[1]
 * <p>For simple paths, this should work in any browser. Under Mozilla and IE evaluateXPath is used.
 * @param {String} expression An XPath expression.
 * @param {Node} root An XML node or document. The path is evaluated relative to this node.
 * @type Node
 */
mozile.xpath.getNode = function(expression, root) {
	if(!root) root = mozile.document;
	if(!expression) return root;
	// Basic caching method
	if(expression == this._lastXPath && root == this._lastRoot) return this._lastNode;	

	var node;
	
	// TODO: Make use of native XPath systems.
	/*
	if(window.XPathEvaluator) {
	  var results = mozile.xpath.evaluate(expression, root);
	  if(results && results[0]) node = results[0];
	  else return null;
	}
	*/

	/*
	try {
		//if(root.selectSingleNode) return root.selectSingleNode(expression);
	} catch(e) {
		alert(e +"\n"+ mozile.dumpError(root));
	}
	//alert("crude method");
	*/
	
	// Try to find id
	var match = expression.match(/^\/\/\*\[@id='(.*?)'\](.*)/m);
	if(match && match[1]) {
		var target = mozile.document.getElementById(match[1]);
		if(target) {
			root = target;
			expression = match[2];
		}
	}
	
	if(!node) {
		var components = mozile.xpath.getComponents(expression);
		node = root;
		for(var i=0; i < components.length; i++) {
			var component = mozile.xpath.getComponent(components[i]);
			node = mozile.xpath._getNthChild(node, component.name, component.position);
			if(!node) return null;
		}

	}

	//alert("Result:"+ node);
	if(node) {
		this._lastNode = node;
		this._lastRoot = root;
		this._lastXPath = expression;
		return node;	
	}
	else return null;
}

/**
 * Returns the Nth child of the given name. 
 * Used to resolve simple XPaths.
 * Works on paths of the form /html[1]/body[1]/p[3]/text()[1]
 * @private
 * @param {Node} parent The parent node to search.
 * @param {String} name The name of the target.
 * @param {Integer} position The position of the target.
 * @type Node
 */
mozile.xpath._getNthChild = function(parent, name, position) {
	var c = 0;
	var p = 1;
	var child = parent.firstChild;
	//alert(parent +" "+ name +" "+ position);
	
	if(parent.nodeType == mozile.dom.DOCUMENT_NODE) 
		return parent.documentElement;
	
	if(name == "*" && parent.childNodes[position - 1])
		return parent.childNodes[position - 1];

	if(name == "text()") {
		while(child) {
			if(child.nodeType == mozile.dom.TEXT_NODE) {
				if(p == position) return child;
				p++;
			}
			child = child.nextSibling;
		}
	}

	else if(name == "comment()") {
		while(child) {
			if(child.nodeType == mozile.dom.COMMENT_NODE) {
				if(p == position) return child;
				p++;
			}
			child = child.nextSibling;
		}
	}

	if(name.indexOf("@") == 0) {
		name = name.substring(1);
		return parent.getAttributeNode(name);
	}
	
	else {
		// Ignore the null namespace of "xmlns:"
		if(name.indexOf("xmlns:") > -1) name = name.substring(6);
		while(child) {
			if(child.nodeName == name || 
				child.nodeName == name.toUpperCase()) {
				if(p == position) return child;
				p++;
			}
			child = child.nextSibling;
		}
	}

	return null;
}


/**
 * Evaluates an XPath expression in the context of the document or a node.
 * <p>This method only supports Mozilla browsers and IE versions with MSXML 3.0+.
 * For name space resolution, see http://www.faqts.com/knowledge_base/view.phtml/aid/34022/fid/119
 * TODO: The namespace resolver should be more clever.
 * @param {String} expression The XPath expression.
 * @param {Node} node Optional. The context within which the expression should be executed. The default is the documentElement.
 * @type Array
 * @return An array of results.
 */
mozile.xpath.evaluate = function(expression, root) {
	if(!root) root = mozile.document;
	
	var doc;
	if(root.ownerDocument) doc = root.ownerDocument;
	else {
		doc = root;
		root = root.documentElement;
	}
	
	var nodes = new Array();
	var results, result;
	if(mozile.window.XPathEvaluator) {
		var XPE = new XPathEvaluator;
		
		// Create a namespace resolver based on the root.
		var NSR = function(prefix) {		
		  //mozile.debug.debug("mozile.xpath.nsResolver", "Prefix: "+ prefix);
		  var namespaceURI = mozile.dom.lookupNamespaceURI(root, prefix);
		  if(namespaceURI) return namespaceURI;
		  else if(mozile.xml.ns[prefix]) return mozile.xml.ns[prefix];
		  return mozile.defaultNS;
		}

		try {
		results = XPE.evaluate(expression, root, NSR, 0, null);
		if(results) {
			result = results.iterateNext();
			while(result) {
				nodes.push(result);
				result = results.iterateNext();
			}
		}
		} catch(e) {
			alert(doc.documentElement.getAttribute("xmlns") +"\n"+ e);
		}
	}

	else if(root.selectNodes) {
		results = root.selectNodes(expression);
		result = results.nextNode();
		while(result) {
			nodes.push(result);
			result = results.nextNode();
		}
	}

	return nodes;
}



Documentation generated by JSDoc on Wed Feb 20 13:25:28 2008