/** * @filename * sweetButty01.js * * @author * Julian Turner, Ripley, Derbyshire, England * http://www.baconbutty.com * * @copyright * (c) Julian Turner 2007 * * @version * 0.1 (developed July 2007) * * Called version 0.1, because it is not quite * fully compliant with SweetXML 0.2 * * @fileoverview * sweetButty01.js enables ``quick and dirty`` parsing * of SweetXML files. * * SweetXML is the creation of Paul Phillips Cantrell * http://innig.net/software/sweetxml/index.html * * @dependencies * sweetButty01.js has no dependencies. */
The licence terms are as follows, which you are deemed to agree to by downloading, running, or modifying SweetButty 1.0:-
var classSweetButty /*: Function*/ = (function()
{Encapsulates functionality to provide simple parsing of SweetXML files.
classSweetButty
Nothing
setIndentUnit(indentUnit : String) : void
Specify what characters the SweetButty file uses for indentation (e.g. 4 spaces or a tab character).
process (sweetXMLMarkup : String) : Object
Parses, and returns the root Object of the parsed tree.
Nothing.
The class returns an Object tree for the parsed nodes. The root Object is added by the parser in addition to the nodes within the SweetXML markup.
Each Object in the tree has the following properties:-
This gives you:
1 = Element
3 = Text
4 = CDATA-Section
8 = Comment
This gives you <tagName>, #text, #comment or #cdata-section.
This gives you the text of a Text, CDATASection and Comment node, but an empty string for Element nodes.
These are the childNodes, but accessed through a direct accessor, rather than a childNodes collection.
The value of any attribute on the Node.
There are 4 possibilities here:-
See examples below if this sounds a little confusing.
None
function classSweetButty() /*: void*/
{
this.indentUnit /*: String*/;
this.initialise();
}
var constr /*: Function*/ = classSweetButty;Constructor function.
None
void
this::initialise
None
None
function initialise () /*: void*/
{
this.indentUnit = "\t";
}
AddMethod(constr, initialise);Initialisation function
None
void
this::AddMethod
Specifies that the default indent unit will be a tab character.
None
function setIndentUnit(
indentUnit /*: String*/
) /*: void*/
{
this.indentUnit = indentUnit;
}
AddMethod(constr, setIndentUnit);You need to tell the parser what indent unit the SweetButty file is using: either TAB or 4 spaces.
indentUnit : String
A TAB or 4 spaces
void
this::AddMethod
None
None
function parse(
markup /*: String*/
) /*: Node*/
{
var nodes /*: Array.<Node>*/ = this.splitIntoNodes(markup);
var rootNode /*: Node*/ = this.nodesToTree(nodes);
this.createTreeShortcuts(rootNode);
return rootNode;
}
AddMethod(constr, parse);The primary parsing function.
sweetXMLMarkup : String
The SweetXML markup.
Node
this::AddMethod
this::createTreeShortcuts
this::nodesToTree
this::parse
this::splitIntoNodes
None
None
var NODE_INDENT = "(\\t+| +)*";
var NODE_TAG_NAME = "([^ \\t\\r\\n\"'=\:\!\#]*)";
var NODE_ATTR_DIVIDER = "([ \\t]*[\\r\\n]+[ \\t]*\\|[ \\t]*|[ \\t]+)";
var NODE_ATTR = "[^ \\t\\r\\n\"'=]+=(\"[^\"]*\"|'[^']*'|[\\w\\-\\.]+)";
var NODE_ATTRS = "((" + NODE_ATTR_DIVIDER + NODE_ATTR + ")*)";
var NODE_INLINE = "[ \\t]*(\\:)?";
var NODE_TEXT = "[ \\t]*(!?\"[^\"]*\"|!?'[^']*'|!?[\\w\\-\\.]+)?";
var NODE_COMMENT = "([ \\t]*\\#[^\\r\\n]+)?";
var NODE_PATTERN =
NODE_INDENT +
NODE_TAG_NAME +
NODE_ATTRS +
NODE_INLINE +
NODE_TEXT +
NODE_COMMENT +
"[ \\t]*(\\r\\n|\\n\\r|\\r|\\n|$)";
var NODE_REGEXP = new RegExp(NODE_PATTERN, "gi");A range of Regular Expressions for parsing the SweetXML node grammar.
var ATTR_PATTERN = "([^ \\t\\r\\n\"'=]+)=(\"([^\"]*)\"|'([^']*)'|([\\w\\-\\.]+))"; var ATTR_REGEXP = new RegExp(ATTR_PATTERN, "gi");
A range of Regular Expressions for parsing the SweetXML attributes grammar.
var TEXT_PATTERN = "[ \\t]*(!?\"([^\"]*)\"|!?'([^']*)'|!?([\\w\\-\\.]+))"; var TEXT_REGEXP = new RegExp(TEXT_PATTERN);
A range of Regular Expressions for parsing the SweetXML text nodes grammar.
I have extended the SweetXML grammar, by allowing the `` character as a prefix to identify CDATASections.
var COMMENT_PATTERN = "[ \\t]*\\#([^\\r\\n]+)"; var COMMENT_REGEXP = new RegExp(COMMENT_PATTERN);
A range of Regular Expressions for parsing the SweetXML comment nodes grammar.
function Node()
{
this.isNode /*: Boolean*/ = true;
this.depth /*: int*/ = 0;
this.length /*: int*/ = 0;
this.nodeType /*: int*/ = 0;
this.nodeName /*: String*/ = "";
this.nodeValue /*: String*/ = "";
this.parentNode /*: Node*/ = null;
}A simple Object constructor for the Node objects created by the parser.
None
void
Nothing
None
None
function splitIntoNodes(
markup /*: String*/
) /*: Array.<Node>*/
{
var execResult /*: Array*/;
var nodes /*: Array.<Node>*/ = [];
var node /*: Node*/;
var nodeIndentPart /*: String*/ = "";
var nodeNamePart /*: String*/ = "";
var nodeAttributesPart /*: String*/ = "";
var nodeInlinePart /*: String*/ = "";
var nodeTextPart /*: String*/ = "";
var nodeCommentPart /*: String*/ = "";
var indentUnitLength /*: String*/ = this.indentUnit.length;
var attrExecResult /*: Array*/ = [];
var attrName /*: String*/ = "";
var attrValue /*: String*/ = "";
var textExecResult /*: Array*/ = [];
var text /*: String*/ = "";
var textNode /*: Object*/;
var commentExecResult /*: Array*/ = [];
var commentNode /*: Object*/;
var overflow /*: int*/ = 1000;
while(execResult = this.exec(NODE_REGEXP, markup))
{
/* Clear*/
node = null;
textNode = null;
commentNode = null;
/* At end */
if (execResult.index == markup.length)
{
break;
}
if (!overflow--)
{
break;
}
/* Empty */
if (!this.trim(execResult[0]))
{
continue;
}
//alert(this.execResultToString(execResult));
/* Get Node Parts*/
nodeIndentPart = execResult[1];
nodeNamePart = this.trim(execResult[2]);
nodeAttributesPart = this.trim(execResult[3]);
nodeInlinePart = this.trim(execResult[7]);
nodeTextPart = this.trim(execResult[8]);
nodeCommentPart = this.trim(execResult[9]);
/* Node*/
node = new Node();
/* Calculate Depth*/
node.depth = Math.ceil(nodeIndentPart.length / indentUnitLength) + 1;
//alert(nodeIndentPart.length);
/* Node Type*/
if (nodeNamePart)
{
node.nodeType = 1;
node.nodeName = nodeNamePart;
}
else if (nodeTextPart)
{
if (/^!/.test(nodeTextPart))
{
node.nodeType = 4;
node.nodeName = "#cdata-section";
}
else
{
node.nodeType = 3;
node.nodeName = "#text";
}
}
else if (nodeCommentPart)
{
node.nodeType = 8;
node.nodeName = "#comment";
}
else
{
continue;
}
/* Node Attributes*/
if (node.nodeType == 1 && nodeAttributesPart)
{
while(attrExecResult = this.exec(ATTR_REGEXP, nodeAttributesPart))
{
attrName = attrExecResult[1];
attrValue = attrExecResult[3] || attrExecResult[4] || attrExecResult[5];
attrValue = this.unescapeAttrHTML(attrValue);
node[attrName] = attrValue;
}
}
/* Inline Extras */
if (node.nodeType == 1 && !nodeInlinePart)
{
nodes[nodes.length] = node;
continue;
}
/* Look for inline parts */
nodes[nodes.length] = node;
/* Text */
if (nodeTextPart)
{
textExecResult = this.exec(TEXT_REGEXP, nodeTextPart)
if ((node.nodeType == 3 || node.nodeType == 4) && !textExecResult)
{
continue;
}
text = textExecResult[2] || textExecResult[3] || textExecResult[4];
text = this.unescapeAttrHTML(text);
if (node.nodeType == 1)
{
textNode = new Node();
textNode.depth = node.depth + 1;
if (/^!/.test(nodeTextPart))
{
textNode.nodeType = 4;
textNode.nodeName = "#cdata-section";
}
else
{
textNode.nodeType = 3;
textNode.nodeName = "#text";
}
textNode.nodeValue = text;
nodes[nodes.length] = textNode;
}
else if (node.nodeType == 3 || node.nodeType == 4)
{
node.nodeValue = text;
}
}
/* Comment */
if (nodeCommentPart)
{
commentExecResult = this.exec(COMMENT_REGEXP, nodeCommentPart);
text = this.trim(commentExecResult[1]);
text = this.unescapeAttrHTML(text);
if (node.nodeType == 1)
{
commentNode = new Node();
commentNode.depth = node.depth + 1;
commentNode.nodeType = 8;
commentNode.nodeName = "#comment";
commentNode.nodeValue = text;
nodes[nodes.length] = commentNode;
}
else if (node.nodeType == 3 || node.nodeType == 4)
{
commentNode = new Node();
commentNode.depth = node.depth;
commentNode.nodeType = 8;
commentNode.nodeName = "#comment";
commentNode.nodeValue = text;
nodes[nodes.length] = commentNode;
}
else if (node.nodeType == 8)
{
node.nodeValue = text;
}
}
}
return nodes;
}
AddMethod(constr, splitIntoNodes);This is the first (and main) part of the 3-part parsing operation.
It uses the regular expressions to parse the SweetXML and creates a single flat Array of Node objects in tree depth-first order.
The depth and parent/child relationships are worked out in the next function: nodesToTree.
sweetXMLMarkup : String
The SweetXML text file.
Array.<Node>
A single flat Array of Node objects in tree depth-first order.
this::AddMethod
this::Node
this::execResultToString
this::splitIntoNodes
this::trim
this::unescapeAttrHTML
None
None
function nodesToTree(
nodes /*: Array.<Node>*/
) /*: Node*/
{
var root /*: Node*/ = new Node();
root.depth = 0;
root.nodeType = 1;
root.nodeName = "root";
var node /*: Node*/;
var parents /*: Array*/ = [root];
var parent /*: Object*/;
var currentParent /*: Object*/ = root;
var depth /*: int*/ = 0;
var lastDepth /*: int*/ = 1;
var lastElement /*: Object*/ = root;
var depthDifference /*: int*/ = 0;
var nodeName /*: String*/;
for (var i = 0; i < nodes.length; i++)
{
node = nodes[i];
if (node.depth == lastDepth)
{
currentParent[currentParent.length++] = node;
node.parentNode = currentParent;
}
else if (node.depth > lastDepth)
{
node.depth = lastDepth + 1;
lastDepth = node.depth;
if (currentParent != lastElement)
{
currentParent = lastElement;
parents.push(lastElement);
}
currentParent[currentParent.length++] = node;
node.parentNode = currentParent;
}
else
{
depthDifference = lastDepth - node.depth;
while (depthDifference--)
{
parents.pop();
}
currentParent = parents[parents.length - 1];
lastDepth = node.depth;
currentParent[currentParent.length++] = node;
node.parentNode = currentParent;
}
if (node.nodeType == 1)
{
lastElement = node;
}
//if (node.nodeType == 3 || node.nodeType == 4)
//{
// for (var j = 0; j < parents.length; j++)
// {
// parents[j].nodeValue += node.nodeValue;
// }
//}
}
nodes.unshift(root);
return root;
//if (root.length == 0)
//{
// return null;
//}
//else if (root.length == 1)
//{
// root[0].parentNode = null;
// return root[0];
//}
//else
//{
// return root;
//}
}
AddMethod(constr, nodesToTree);This organises the Nodes into a tree structure.
The result is then passed to createTreeShortcuts.
nodes : Array.<Node>
The result of splitIntoNodes.
Node
The root node of the tree. This function will always create a root node above the root node of the parsed node tree, just to allow you to parse SweetXML fragments as well.
this::AddMethod
this::Node
this::nodesToTree
None
None
function createTreeShortcuts (
node /*: Node*/
) /*: Node*/
{
//alert(node.nodeName + " " + node.length);
var child /*: Node*/ = node[0];
var parent /*: Node*/ = node.parentNode;
var nodeName /*: String*/ = node.nodeName;
if (parent && node.length == 1 && child && (child.nodeType == 3 || child.nodeType == 4))
{
if (typeof parent[nodeName] == "undefined")
{
parent[nodeName] = child.nodeValue;
}
else if (typeof parent[nodeName] == "string")
{
var a /*: Array*/ = [parent[nodeName]];
a.push(child.nodeValue);
parent[nodeName] = a;
}
else if (typeof parent[nodeName] == "object")
{
var parentProperty = parent[nodeName];
if (parentProperty.isNode)
{
var a /*: Array*/ = [parent[nodeName]];
a.push(node);
parent[nodeName] = a;
}
else
{
if (typeof (parentProperty[0]) == "string")
{
parentProperty.push(child.nodeValue);
}
else
{
parentProperty.push(node);
}
}
}
return;
}
else if (parent)
{
if (typeof parent[nodeName] == "undefined")
{
parent[nodeName] = node;
}
else if (typeof parent[nodeName] == "string")
{
var a /*: Array*/ = [parent[nodeName]];
a.push(node);
parent[nodeName] = a;
}
else if (typeof parent[nodeName] == "object")
{
var parentProperty = parent[nodeName];
if (parentProperty.isNode)
{
var a /*: Array*/ = [parent[nodeName]];
a.push(node);
parent[nodeName] = a;
}
else
{
parentProperty.push(node);
}
}
}
if (node.length == 0)
{
return;
}
for (var i = 0; i < node.length; i++)
{
child = node[i];
if (child.nodeType == 1)
{
this.createTreeShortcuts(child);
}
}
}
AddMethod(constr, createTreeShortcuts);This is is a nice-to-have function which enables you to access childNodes by nodeName as a property of the the parentNode.
rootNode : Node
The Node returned from nodesToTree.
void
The rootNode is operated on by reference.
this::AddMethod
Given:
contacts
contact
name:George Bush
contact
name:Tony Blair
the following are equivalent:
root.contacts[0][0];
root.contacts[0].name;
root.contacts.contact[0][0]
root.contacts.contact[0].name
None
function trim(
s /*: String*/
) /*: String*/
{
return s.replace(/^\s\s*/, "").replace(/\s*\s$/,"");
}
AddMethod(constr, trim);Trims whitespace from start and end of String.
s : String
The String to trim.
String
The trimmed String.
this::AddMethod
None
None
function escapeAttrHTML(
string /*: String*/
) /*: String*/
{
return string.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
}
AddMethod(constr, escapeAttrHTML);[NOT CURRENTLY USED]
The characters ' " < > & are escaped in SweetButty strings in the same way as HTML attributes.
This would be used to do the escaping, prior to serialising the node tree.
string : String
String to escape.
String
Escaped string.
this::AddMethod
The characters ' " < > & are converted to ' etc.
None
function exec(
re /*: RegExp*/,
input /*: String*/
) /*: Array*/
{
var execResult /*: Array*/ = re.exec(input);
if (execResult === null)
{
return execResult;
}
for (var i /*: int*/ = 0; i < 10; i++)
{
if (typeof execResult[i] === "undefined" || execResult[i] == "undefined")
{
execResult[i] = "";
}
}
return execResult;
}
AddMethod(constr, exec);Calls the exec method of the RegExp object on the supplied input.
Purpose is to convert unmatched parentheses that return undefined into blank strings, as blank strings are preferred.
re : RegExp
The RegExp object on which the exec method is called.
input : String
The String in respect of which the exec method is called.
Array
The Array returned by RegExp.exec with undefined items converted to blank Strings.
this::AddMethod
None
None
function unescapeAttrHTML(
string /*: String*/
) /*: String*/
{
return string.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'").replace(/&/g, "&");
}
AddMethod(constr, unescapeAttrHTML);As SweetButty escapes the contents of attributes and text nodes in the same way as XML, this is used to unescape during the parsing process.
string : String
String to unescape.
String
Unescaped String.
this::AddMethod
None
None
function escapeHTML(
string /*: String*/
) /*: String*/
{
return string.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
}
AddMethod(constr, escapeHTML);[NOT CURRENTLY USED]
Converts < > & to < > &.
string : String
The string to escape.
String
The escaped string.
this::AddMethod
None
None
function unescapeHTML(
string /*: String*/
) /*: String*/
{
return string.replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&");
}
AddMethod(constr, unescapeHTML);[NOT CURRENTLY USED]
Converts < > & to < > &.
string : String
The string to unescape.
String
The unescaped string.
this::AddMethod
None
None
function execResultToString(
execResult /*: Array*/
) /*: String*/
{
if (!execResult)
{
return "";
}
var a = ["Result","======="];
for (var key in execResult)
{
a.push(key + " : |" + execResult[key] + "|");
}
a.push("Substr : |" + execResult.input.substring(execResult.index, execResult.lastIndex) + "|");
return a.join("\r\n");
}
AddMethod(constr, execResultToString);Debugging. Formats the Array returned by the RegExp.exec method.
execResult : Array
The Array returned by the RegExp.exec method
String
The properties of the Array on separate lines.
this::AddMethod
None
None
function nodeListToString(
nodes /*: Array.<Nodes>*/
) /*: String*/
{
var a /*: Array.<String>*/ = [];
var node;
for (var i /*: String*/ = 0; i < nodes.length; i++)
{
node = nodes[i];
var nodeString /*: String*/ = [];
for (var k in node)
{
nodeString.push(k + " ".substring(0,12 - k.length) + "\t: " + "\"" + node[k] + "\"");
}
a.push(nodeString.join("\r\n"));
}
return a.join("\r\n\r\n==============================\r\n");
}
AddMethod(constr, nodeListToString);Debugging. Formats the Array of Nodes returned by parseToNodes as a String.
nodes : Array.<Nodes>
The Array returned by parseToNodes.
String
Formatted string, consisting of each Node and its properties.
this::AddMethod
None
None
return constr; })(); //makeModulePublicClass(classSweetXML);
Two cannibals eating a clown. One says to the other: "Does this taste funny to you.".
SweetXML 0.2 and this grammar is the creation of Paul Phillips Cantrell.
sweetXml
: directives
line (EOL line)*
;directives
: (nestedAngleBraces | S | EOL)*
;protected
nestedAngleBraces
: '<' (nestedAngleBraces | ~('<'|'>') )* '>'
;
line
: indent
element?
S*
comment?
;indent
: S*
;element
: tag
| quotedText;tag
: NAME attributes? S* inlineText?
;protected
attributes
: S+ attribute attributes?
;attribute
: NAME S* '=' S* text
;inlineText
: ':' S* (text)?
;text
: quotedText
| unquotedText
;quotedText
: '"' (~('"'))* '"'
| '\'' (~('\''))* '\''
;
unquotedText
: (NAME_START | NAME_CONT)+
;comment
: '#' (~(EOL))*;
NAME
: NAME_START (NAME_START | NAME_CONT)*
;fragment
NAME_START
: 'A'..'Z' | 'a'..'z' | '_'
| '\u00C0'..'\u00D6' | '\u00D8'..'\u00F6' | '\u00F8'..'\u02FF' | '\u0370'..'\u037D'
| '\u037F'..'\u1FFF' | '\u200C'..'\u200D' | '\u2070'..'\u218F' | '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF' | '\uF900'..'\uFDCF' | '\uFDF0'..'\uFFFD'
;fragment
NAME_CONT
: '0'..'9' | '-' | '.'
| '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040'
;S : ' '
| '\t';EOL
: '\r\n'
| '\r'
| '\n'
;