extract.js
Summary
Tools for extracting XML from HTML using information in a special "xml" attribute. See the comment for the Instruction object for details on instruction format.
Version: 0.8
$Id: overview-summary-extract.js.html,v 1.1 2008/02/20 18:47:09 jameso Exp $
Author: James A. Overton
mozile.require("mozile.dom");
mozile.require("mozile.save");
mozile.provide("mozile.save.extract.*");
mozile.save.extract = new Object();
mozile.save.extract.prototype = new mozile.Module;
mozile.save.extract.extract = function(node, container) {
mozile.require("mozile.dom");
if(!node) return null;
var text, i;
if(node.nodeType == mozile.dom.ELEMENT_NODE) {
var newContainer = container;
var xml = node.getAttribute("xml");
if(xml) {
var instructions = mozile.save.extract.parseInstructions(xml);
newContainer = instructions[0].execute(node, container);
for(i=1; i < instructions.length; i++) {
instructions[i].execute(node, newContainer);
}
if(instructions[0].getType() == "Set Attribute") return container;
}
for(i=0; i < node.childNodes.length; i++) {
mozile.save.extract.extract(node.childNodes[i], newContainer);
}
}
return container;
}
mozile.save.extract.parseInstructions = function(string) {
var instructions = new Array();
var instruction = new mozile.save.extract.Instruction;
var mode = "target";
var c;
for(var i=0; i < string.length; i++) {
c = string.charAt(i);
if(c == "=") {
i++;
c = string.charAt(i);
if(c == "'") {
mode = "value";
continue;
}
else mode = "select";
}
if(c == "'" && mode == "value") {
mode = "target";
continue;
}
else if(c.match(/\s/) && mode != "value") {
instructions.push(instruction);
instruction = new mozile.save.extract.Instruction;
mode = "target";
continue;
}
instruction[mode] += c;
if(c == "\\" && string.charAt(i+1)) {
i++;
instruction[mode] += string.charAt(i);
}
}
instructions.push(instruction);
return instructions;
}
mozile.save.extract.Instruction = function() {
this.target = "";
this.select = "";
this.value = "";
}
mozile.save.extract.Instruction.prototype.toString = function() {
return ["Instruction", this.target, this.select, this.value].join(" :: ");
}
mozile.save.extract.Instruction.prototype.getType = function() {
if(!this.target) return null;
if(this.value) return "Assign Value";
if(this.select) return "Map Selection";
if(this.target.charAt(0) == "@") return "Set Attribute";
else return "Create Element";
}
mozile.save.extract.Instruction.prototype.execute = function(element, container) {
try {
var useContainer = container;
var target = this.target;
var attribute, text;
var value = this.value;
if(this.select && this.select.charAt(0) == "@") {
attribute = this.select.substring(1);
value = element.getAttribute(attribute);
if(!value && element[attribute] != undefined)
value = element[attribute];
}
if(this.target.indexOf("/") != -1) {
var ancestors = this.target.split("/");
target = ancestors.pop();
for(var i = ancestors.length - 1; i >= 0; i--) {
if(ancestors[i] == "..") {
useContainer = useContainer.parentNode;
continue;
}
if(!useContainer) {
alert("Aaagh");
mozile.debug.debug("mozile.save.extract.Instruction.prototype.execute", "No node matching target: "+ this.target);
return null;
}
}
}
if(target.charAt(0) == "@") {
if(!this.value &&
(!this.select || this.select == "*" || this.select == "text()") ) {
value = mozile.dom.getText(element);
}
}
else if(target != "..") {
var newContainer = mozile.dom.createElementNS(container.namespaceURI, target);
useContainer.appendChild(newContainer);
useContainer = newContainer;
container = newContainer;
}
if(value) {
if(target.charAt(0) == "@") {
useContainer.setAttribute(target.substring(1), value);
}
else {
text = useContainer.ownerDocument.createTextNode(value);
useContainer.appendChild(text);
}
}
if(target.charAt(0) != "@") {
if(this.select == "*") {
for(var i=0; i < element.childNodes.length; i++) {
useContainer.appendChild(element.childNodes[i].cloneNode(true));
}
}
if(this.select == "text()") {
for(var i=0; i < element.childNodes.length; i++) {
if(element.childNodes[i].nodeType != mozile.dom.TEXT_NODE) continue;
if(mozile.dom.isWhitespace(element.childNodes[i])) continue;
useContainer.appendChild(element.childNodes[i].cloneNode(true));
}
}
}
} catch(e) {
alert(mozile.dumpError(e) +"\n"+ element +" "+ container +"\n"+ element.getAttribute("xml") +"\n"+ this.toString() +"\n"+ target +" "+ value);
}
return container;
}
Documentation generated by
JSDoc on Wed Feb 20 13:25:28 2008