3 Plus a bunch of object utility functions
6 var XML = require('pixl-xml');
7 var myxmlstring = '<?xml version="1.0"?><Document>' +
8 '<Simple>Hello</Simple>' +
9 '<Node Key="Value">Content</Node>' +
12 var tree = XML.parse( myxmlstring, { preserveAttributes: true });
15 tree.Simple = "Hello2";
16 tree.Node._Attribs.Key = "Value2";
17 tree.Node._Data = "Content2";
18 tree.New = "I added this";
20 console.log( XML.stringify( tree, 'Document' ) );
22 Copyright (c) 2004 - 2015 Joseph Huckaby
23 Released under the MIT License
24 This version is for Node.JS, converted in 2012.
27 var fs = require('fs');
29 var indent_string = "\t";
30 var xml_header = '<?xml version="1.0"?>';
32 var re_valid_tag_name = /^\w[\w\-\:]*$/;
34 var XML = exports.XML = function XML(args) {
35 // class constructor for XML parser class
36 // pass in args hash or text to parse
39 for (var key in args) this[key] = args[key];
41 else this.text = args || '';
44 if (this.text instanceof Buffer) {
45 this.text = this.text.toString();
48 if (!this.text.match(/^\s*</)) {
51 this.text = fs.readFileSync(file, { encoding: 'utf8' });
52 if (!this.text) throw new Error("File not found: " + file);
58 this.dtdNodeList = [];
59 this.documentNodeName = '';
62 this.attribsKey = this.attribsKey.toLowerCase();
63 this.dataKey = this.dataKey.toLowerCase();
66 this.patTag.lastIndex = 0;
67 if (this.text) this.parse();
70 XML.prototype.preserveAttributes = false;
71 XML.prototype.lowerCase = false;
73 XML.prototype.patTag = /([^<]*?)<([^>]+)>/g;
74 XML.prototype.patSpecialTag = /^\s*([\!\?])/;
75 XML.prototype.patPITag = /^\s*\?/;
76 XML.prototype.patCommentTag = /^\s*\!--/;
77 XML.prototype.patDTDTag = /^\s*\!DOCTYPE/;
78 XML.prototype.patCDATATag = /^\s*\!\s*\[\s*CDATA/;
79 XML.prototype.patStandardTag = /^\s*(\/?)([\w\-\:\.]+)\s*(.*)$/;
80 XML.prototype.patSelfClosing = /\/\s*$/;
81 XML.prototype.patAttrib = new RegExp("([\\w\\-\\:\\.]+)\\s*=\\s*([\\\"\\'])([^\\2]*?)\\2", "g");
82 XML.prototype.patPINode = /^\s*\?\s*([\w\-\:]+)\s*(.*)$/;
83 XML.prototype.patEndComment = /--$/;
84 XML.prototype.patNextClose = /([^>]*?)>/g;
85 XML.prototype.patExternalDTDNode = new RegExp("^\\s*\\!DOCTYPE\\s+([\\w\\-\\:]+)\\s+(SYSTEM|PUBLIC)\\s+\\\"([^\\\"]+)\\\"");
86 XML.prototype.patInlineDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[/;
87 XML.prototype.patEndDTD = /\]$/;
88 XML.prototype.patDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[(.*)\]/;
89 XML.prototype.patEndCDATA = /\]\]$/;
90 XML.prototype.patCDATANode = /^\s*\!\s*\[\s*CDATA\s*\[([^]*)\]\]/;
92 XML.prototype.attribsKey = '_Attribs';
93 XML.prototype.dataKey = '_Data';
95 XML.prototype.parse = function(branch, name) {
96 // parse text into XML tree, recurse for nested nodes
97 if (!branch) branch = this.tree;
98 if (!name) name = null;
99 var foundClosing = false;
102 // match each tag, plus preceding text
103 while ( matches = this.patTag.exec(this.text) ) {
104 var before = matches[1];
105 var tag = matches[2];
107 // text leading up to tag = content of parent node
108 if (before.match(/\S/)) {
109 if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
110 branch[this.dataKey] += trim(decode_entities(before));
113 // parse based on tag type
114 if (tag.match(this.patSpecialTag)) {
116 if (tag.match(this.patPITag)) tag = this.parsePINode(tag);
117 else if (tag.match(this.patCommentTag)) tag = this.parseCommentNode(tag);
118 else if (tag.match(this.patDTDTag)) tag = this.parseDTDNode(tag);
119 else if (tag.match(this.patCDATATag)) {
120 tag = this.parseCDATANode(tag);
121 if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
122 branch[this.dataKey] += trim(decode_entities(tag));
125 this.throwParseError( "Malformed special tag", tag );
129 if (tag == null) break;
133 // Tag is standard, so parse name and attributes (if any)
134 var matches = tag.match(this.patStandardTag);
136 this.throwParseError( "Malformed tag", tag );
140 var closing = matches[1];
141 var nodeName = this.lowerCase ? matches[2].toLowerCase() : matches[2];
142 var attribsRaw = matches[3];
144 // If this is a closing tag, make sure it matches its opening tag
146 if (nodeName == (name || '')) {
151 this.throwParseError( "Mismatched closing tag (expected </" + name + ">)", tag );
156 // Not a closing tag, so parse attributes into hash. If tag
157 // is self-closing, no recursive parsing is needed.
158 var selfClosing = !!attribsRaw.match(this.patSelfClosing);
162 // preserve attributes means they go into a sub-hash named "_Attribs"
163 // the XML composer honors this for restoring the tree back into XML
164 if (this.preserveAttributes) {
165 leaf[this.attribsKey] = {};
166 attribs = leaf[this.attribsKey];
170 this.patAttrib.lastIndex = 0;
171 while ( matches = this.patAttrib.exec(attribsRaw) ) {
172 var key = this.lowerCase ? matches[1].toLowerCase() : matches[1];
173 attribs[ key ] = decode_entities( matches[3] );
176 // if no attribs found, but we created the _Attribs subhash, clean it up now
177 if (this.preserveAttributes && !num_keys(attribs)) {
178 delete leaf[this.attribsKey];
181 // Recurse for nested nodes
183 this.parse( leaf, nodeName );
184 if (this.error()) break;
187 // Compress into simple node if text only
188 var num_leaf_keys = num_keys(leaf);
189 if ((typeof(leaf[this.dataKey]) != 'undefined') && (num_leaf_keys == 1)) {
190 leaf = leaf[this.dataKey];
192 else if (!num_leaf_keys) {
196 // Add leaf to parent branch
197 if (typeof(branch[nodeName]) != 'undefined') {
198 if (isa_array(branch[nodeName])) {
199 branch[nodeName].push( leaf );
202 var temp = branch[nodeName];
203 branch[nodeName] = [ temp, leaf ];
207 branch[nodeName] = leaf;
210 if (this.error() || (branch == this.tree)) break;
215 // Make sure we found the closing tag
216 if (name && !foundClosing) {
217 this.throwParseError( "Missing closing tag (expected </" + name + ">)", name );
220 // If we are the master node, finish parsing and setup our doc node
221 if (branch == this.tree) {
222 if (typeof(this.tree[this.dataKey]) != 'undefined') delete this.tree[this.dataKey];
224 if (num_keys(this.tree) > 1) {
225 this.throwParseError( 'Only one top-level node is allowed in document', first_key(this.tree) );
229 this.documentNodeName = first_key(this.tree);
230 if (this.documentNodeName) {
231 this.tree = this.tree[this.documentNodeName];
236 XML.prototype.throwParseError = function(key, tag) {
237 // log error and locate current line number in source XML document
238 var parsedSource = this.text.substring(0, this.patTag.lastIndex);
239 var eolMatch = parsedSource.match(/\n/g);
240 var lineNum = (eolMatch ? eolMatch.length : 0) + 1;
241 lineNum -= tag.match(/\n/) ? tag.match(/\n/g).length : 0;
246 text: '<' + tag + '>',
250 // Throw actual error (must wrap parse in try/catch)
251 throw new Error( this.getLastError() );
254 XML.prototype.error = function() {
255 // return number of errors
256 return this.errors.length;
259 XML.prototype.getError = function(error) {
260 // get formatted error
262 if (!error) return '';
264 text = (error.type || 'General') + ' Error';
265 if (error.code) text += ' ' + error.code;
266 text += ': ' + error.key;
268 if (error.line) text += ' on line ' + error.line;
269 if (error.text) text += ': ' + error.text;
274 XML.prototype.getLastError = function() {
275 // Get most recently thrown error in plain text format
276 if (!this.error()) return '';
277 return this.getError( this.errors[this.errors.length - 1] );
280 XML.prototype.parsePINode = function(tag) {
281 // Parse Processor Instruction Node, e.g. <?xml version="1.0"?>
282 if (!tag.match(this.patPINode)) {
283 this.throwParseError( "Malformed processor instruction", tag );
287 this.piNodeList.push( tag );
291 XML.prototype.parseCommentNode = function(tag) {
292 // Parse Comment Node, e.g. <!-- hello -->
294 this.patNextClose.lastIndex = this.patTag.lastIndex;
296 while (!tag.match(this.patEndComment)) {
297 if (matches = this.patNextClose.exec(this.text)) {
298 tag += '>' + matches[1];
301 this.throwParseError( "Unclosed comment tag", tag );
306 this.patTag.lastIndex = this.patNextClose.lastIndex;
310 XML.prototype.parseDTDNode = function(tag) {
311 // Parse Document Type Descriptor Node, e.g. <!DOCTYPE ... >
314 if (tag.match(this.patExternalDTDNode)) {
315 // tag is external, and thus self-closing
316 this.dtdNodeList.push( tag );
318 else if (tag.match(this.patInlineDTDNode)) {
319 // Tag is inline, so check for nested nodes.
320 this.patNextClose.lastIndex = this.patTag.lastIndex;
322 while (!tag.match(this.patEndDTD)) {
323 if (matches = this.patNextClose.exec(this.text)) {
324 tag += '>' + matches[1];
327 this.throwParseError( "Unclosed DTD tag", tag );
332 this.patTag.lastIndex = this.patNextClose.lastIndex;
334 // Make sure complete tag is well-formed, and push onto DTD stack.
335 if (tag.match(this.patDTDNode)) {
336 this.dtdNodeList.push( tag );
339 this.throwParseError( "Malformed DTD tag", tag );
344 this.throwParseError( "Malformed DTD tag", tag );
351 XML.prototype.parseCDATANode = function(tag) {
352 // Parse CDATA Node, e.g. <![CDATA[Brooks & Shields]]>
354 this.patNextClose.lastIndex = this.patTag.lastIndex;
356 while (!tag.match(this.patEndCDATA)) {
357 if (matches = this.patNextClose.exec(this.text)) {
358 tag += '>' + matches[1];
361 this.throwParseError( "Unclosed CDATA tag", tag );
366 this.patTag.lastIndex = this.patNextClose.lastIndex;
368 if (matches = tag.match(this.patCDATANode)) {
372 this.throwParseError( "Malformed CDATA tag", tag );
377 XML.prototype.getTree = function() {
378 // get reference to parsed XML tree
382 XML.prototype.compose = function() {
383 // compose tree back into XML
384 var raw = compose_xml( this.tree, this.documentNodeName );
385 var body = raw.substring( raw.indexOf("\n") + 1, raw.length );
388 if (this.piNodeList.length) {
389 for (var idx = 0, len = this.piNodeList.length; idx < len; idx++) {
390 xml += '<' + this.piNodeList[idx] + '>' + "\n";
394 xml += xml_header + "\n";
397 if (this.dtdNodeList.length) {
398 for (var idx = 0, len = this.dtdNodeList.length; idx < len; idx++) {
399 xml += '<' + this.dtdNodeList[idx] + '>' + "\n";
408 // Static Utility Functions:
411 var parse_xml = exports.parse = function parse_xml(text, opts) {
412 // turn text into XML tree quickly
413 if (!opts) opts = {};
415 var parser = new XML(opts);
416 return parser.error() ? parser.getLastError() : parser.getTree();
419 var trim = exports.trim = function trim(text) {
420 // strip whitespace from beginning and end of string
421 if (text == null) return '';
423 if (text && text.replace) {
424 text = text.replace(/^\s+/, "");
425 text = text.replace(/\s+$/, "");
431 var encode_entities = exports.encodeEntities = function encode_entities(text) {
432 // Simple entitize exports.for = function for composing XML
433 if (text == null) return '';
435 if (text && text.replace) {
436 text = text.replace(/\&/g, "&"); // MUST BE FIRST
437 text = text.replace(/</g, "<");
438 text = text.replace(/>/g, ">");
444 var encode_attrib_entities = exports.encodeAttribEntities = function encode_attrib_entities(text) {
445 // Simple entitize exports.for = function for composing XML attributes
446 if (text == null) return '';
448 if (text && text.replace) {
449 text = text.replace(/\&/g, "&"); // MUST BE FIRST
450 text = text.replace(/</g, "<");
451 text = text.replace(/>/g, ">");
452 text = text.replace(/\"/g, """);
453 text = text.replace(/\'/g, "'");
459 var decode_entities = exports.decodeEntities = function decode_entities(text) {
460 // Decode XML entities into raw ASCII
461 if (text == null) return '';
463 if (text && text.replace && text.match(/\&/)) {
464 text = text.replace(/\<\;/g, "<");
465 text = text.replace(/\>\;/g, ">");
466 text = text.replace(/\"\;/g, '"');
467 text = text.replace(/\&apos\;/g, "'");
468 text = text.replace(/\&\;/g, "&"); // MUST BE LAST
474 var compose_xml = exports.stringify = function compose_xml(node, name, indent) {
475 // Compose node into XML including attributes
476 // Recurse for child nodes
479 // If this is the root node, set the indent to 0
480 // and setup the XML header (PI node)
483 xml = xml_header + "\n";
486 // no name provided, assume content is wrapped in it
487 name = first_key(node);
492 // Setup the indent text
493 var indent_text = "";
494 for (var k = 0; k < indent; k++) indent_text += indent_string;
496 if ((typeof(node) == 'object') && (node != null)) {
497 // node is object -- now see if it is an array or hash
498 if (!node.length) { // what about zero-length array?
500 xml += indent_text + "<" + name;
504 for (var key in node) num_keys++; // there must be a better way...
506 if (node["_Attribs"]) {
508 var sorted_keys = hash_keys_to_array(node["_Attribs"]).sort();
509 for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
510 var key = sorted_keys[idx];
511 xml += " " + key + "=\"" + encode_attrib_entities(node["_Attribs"][key]) + "\"";
515 if (num_keys > has_attribs) {
516 // has child elements
520 // simple text child node
521 xml += encode_entities(node["_Data"]) + "</" + name + ">\n";
526 var sorted_keys = hash_keys_to_array(node).sort();
527 for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
528 var key = sorted_keys[idx];
529 if ((key != "_Attribs") && key.match(re_valid_tag_name)) {
530 // recurse for node, with incremented indent value
531 xml += compose_xml( node[key], key, indent + 1 );
532 } // not _Attribs key
535 xml += indent_text + "</" + name + ">\n";
539 // no child elements, so self-close
545 for (var idx = 0; idx < node.length; idx++) {
546 // recurse for node in array with same indent
547 xml += compose_xml( node[idx], name, indent );
552 // node is simple string
553 xml += indent_text + "<" + name + ">" + encode_entities(node) + "</" + name + ">\n";
554 } // simple text node
559 var always_array = exports.alwaysArray = function always_array(obj, key) {
560 // if object is not array, return array containing object
561 // if key is passed, work like XMLalwaysarray() instead
563 if ((typeof(obj[key]) != 'object') || (typeof(obj[key].length) == 'undefined')) {
566 obj[key] = new Array();
572 if ((typeof(obj) != 'object') || (typeof(obj.length) == 'undefined')) { return [ obj ]; }
577 var hash_keys_to_array = exports.hashKeysToArray = function hash_keys_to_array(hash) {
578 // convert hash keys to array (discard values)
580 for (var key in hash) array.push(key);
584 var isa_hash = exports.isaHash = function isa_hash(arg) {
585 // determine if arg is a hash
586 return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) == 'undefined') );
589 var isa_array = exports.isaArray = function isa_array(arg) {
590 // determine if arg is an array or is array-like
591 if (typeof(arg) == 'array') return true;
592 return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) != 'undefined') );
595 var first_key = exports.firstKey = function first_key(hash) {
596 // return first key from hash (unordered)
597 for (var key in hash) return key;
598 return null; // no keys in hash
601 var num_keys = exports.numKeys = function num_keys(hash) {
602 // count the number of keys in a hash
604 for (var a in hash) count++;