]> arthur.barton.de Git - netdata.git/blob - node.d/node_modules/pixl-xml.js
Merge remote-tracking branch 'upstream/master' into health
[netdata.git] / node.d / node_modules / pixl-xml.js
1 /*
2         JavaScript XML Library
3         Plus a bunch of object utility functions
4         
5         Usage:
6                 var XML = require('pixl-xml');
7                 var myxmlstring = '<?xml version="1.0"?><Document>' + 
8                         '<Simple>Hello</Simple>' + 
9                         '<Node Key="Value">Content</Node>' + 
10                         '</Document>';
11                 
12                 var tree = XML.parse( myxmlstring, { preserveAttributes: true });
13                 console.log( tree );
14                 
15                 tree.Simple = "Hello2";
16                 tree.Node._Attribs.Key = "Value2";
17                 tree.Node._Data = "Content2";
18                 tree.New = "I added this";
19                 
20                 console.log( XML.stringify( tree, 'Document' ) );
21         
22         Copyright (c) 2004 - 2015 Joseph Huckaby
23         Released under the MIT License
24         This version is for Node.JS, converted in 2012.
25 */
26
27 var fs = require('fs');
28
29 var indent_string = "\t";
30 var xml_header = '<?xml version="1.0"?>';
31 var sort_args = null;
32 var re_valid_tag_name  = /^\w[\w\-\:]*$/;
33
34 var XML = exports.XML = function XML(args) {
35         // class constructor for XML parser class
36         // pass in args hash or text to parse
37         if (!args) args = '';
38         if (isa_hash(args)) {
39                 for (var key in args) this[key] = args[key];
40         }
41         else this.text = args || '';
42         
43         // stringify buffers
44         if (this.text instanceof Buffer) {
45                 this.text = this.text.toString();
46         }
47         
48         if (!this.text.match(/^\s*</)) {
49                 // try as file path
50                 var file = this.text;
51                 this.text = fs.readFileSync(file, { encoding: 'utf8' });
52                 if (!this.text) throw new Error("File not found: " + file);
53         }
54         
55         this.tree = {};
56         this.errors = [];
57         this.piNodeList = [];
58         this.dtdNodeList = [];
59         this.documentNodeName = '';
60         
61         if (this.lowerCase) {
62                 this.attribsKey = this.attribsKey.toLowerCase();
63                 this.dataKey = this.dataKey.toLowerCase();
64         }
65         
66         this.patTag.lastIndex = 0;
67         if (this.text) this.parse();
68 }
69
70 XML.prototype.preserveAttributes = false;
71 XML.prototype.lowerCase = false;
72
73 XML.prototype.patTag = /([^<]*?)<([^>]+)>/g;
74 XML.prototype.patSpecialTag = /^\s*([\!\?])/;
75 XML.prototype.patPITag = /^\s*\?/;
76 XML.prototype.patCommentTag = /^\s*\!--/;
77 XML.prototype.patDTDTag = /^\s*\!DOCTYPE/;
78 XML.prototype.patCDATATag = /^\s*\!\s*\[\s*CDATA/;
79 XML.prototype.patStandardTag = /^\s*(\/?)([\w\-\:\.]+)\s*(.*)$/;
80 XML.prototype.patSelfClosing = /\/\s*$/;
81 XML.prototype.patAttrib = new RegExp("([\\w\\-\\:\\.]+)\\s*=\\s*([\\\"\\'])([^\\2]*?)\\2", "g");
82 XML.prototype.patPINode = /^\s*\?\s*([\w\-\:]+)\s*(.*)$/;
83 XML.prototype.patEndComment = /--$/;
84 XML.prototype.patNextClose = /([^>]*?)>/g;
85 XML.prototype.patExternalDTDNode = new RegExp("^\\s*\\!DOCTYPE\\s+([\\w\\-\\:]+)\\s+(SYSTEM|PUBLIC)\\s+\\\"([^\\\"]+)\\\"");
86 XML.prototype.patInlineDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[/;
87 XML.prototype.patEndDTD = /\]$/;
88 XML.prototype.patDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[(.*)\]/;
89 XML.prototype.patEndCDATA = /\]\]$/;
90 XML.prototype.patCDATANode = /^\s*\!\s*\[\s*CDATA\s*\[([^]*)\]\]/;
91
92 XML.prototype.attribsKey = '_Attribs';
93 XML.prototype.dataKey = '_Data';
94
95 XML.prototype.parse = function(branch, name) {
96         // parse text into XML tree, recurse for nested nodes
97         if (!branch) branch = this.tree;
98         if (!name) name = null;
99         var foundClosing = false;
100         var matches = null;
101         
102         // match each tag, plus preceding text
103         while ( matches = this.patTag.exec(this.text) ) {
104                 var before = matches[1];
105                 var tag = matches[2];
106                 
107                 // text leading up to tag = content of parent node
108                 if (before.match(/\S/)) {
109                         if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
110                         branch[this.dataKey] += trim(decode_entities(before));
111                 }
112                 
113                 // parse based on tag type
114                 if (tag.match(this.patSpecialTag)) {
115                         // special tag
116                         if (tag.match(this.patPITag)) tag = this.parsePINode(tag);
117                         else if (tag.match(this.patCommentTag)) tag = this.parseCommentNode(tag);
118                         else if (tag.match(this.patDTDTag)) tag = this.parseDTDNode(tag);
119                         else if (tag.match(this.patCDATATag)) {
120                                 tag = this.parseCDATANode(tag);
121                                 if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
122                                 branch[this.dataKey] += trim(decode_entities(tag));
123                         } // cdata
124                         else {
125                                 this.throwParseError( "Malformed special tag", tag );
126                                 break;
127                         } // error
128                         
129                         if (tag == null) break;
130                         continue;
131                 } // special tag
132                 else {
133                         // Tag is standard, so parse name and attributes (if any)
134                         var matches = tag.match(this.patStandardTag);
135                         if (!matches) {
136                                 this.throwParseError( "Malformed tag", tag );
137                                 break;
138                         }
139                         
140                         var closing = matches[1];
141                         var nodeName = this.lowerCase ? matches[2].toLowerCase() : matches[2];
142                         var attribsRaw = matches[3];
143                         
144                         // If this is a closing tag, make sure it matches its opening tag
145                         if (closing) {
146                                 if (nodeName == (name || '')) {
147                                         foundClosing = 1;
148                                         break;
149                                 }
150                                 else {
151                                         this.throwParseError( "Mismatched closing tag (expected </" + name + ">)", tag );
152                                         break;
153                                 }
154                         } // closing tag
155                         else {
156                                 // Not a closing tag, so parse attributes into hash.  If tag
157                                 // is self-closing, no recursive parsing is needed.
158                                 var selfClosing = !!attribsRaw.match(this.patSelfClosing);
159                                 var leaf = {};
160                                 var attribs = leaf;
161                                 
162                                 // preserve attributes means they go into a sub-hash named "_Attribs"
163                                 // the XML composer honors this for restoring the tree back into XML
164                                 if (this.preserveAttributes) {
165                                         leaf[this.attribsKey] = {};
166                                         attribs = leaf[this.attribsKey];
167                                 }
168                                 
169                                 // parse attributes
170                                 this.patAttrib.lastIndex = 0;
171                                 while ( matches = this.patAttrib.exec(attribsRaw) ) {
172                                         var key = this.lowerCase ? matches[1].toLowerCase() : matches[1];
173                                         attribs[ key ] = decode_entities( matches[3] );
174                                 } // foreach attrib
175                                 
176                                 // if no attribs found, but we created the _Attribs subhash, clean it up now
177                                 if (this.preserveAttributes && !num_keys(attribs)) {
178                                         delete leaf[this.attribsKey];
179                                 }
180                                 
181                                 // Recurse for nested nodes
182                                 if (!selfClosing) {
183                                         this.parse( leaf, nodeName );
184                                         if (this.error()) break;
185                                 }
186                                 
187                                 // Compress into simple node if text only
188                                 var num_leaf_keys = num_keys(leaf);
189                                 if ((typeof(leaf[this.dataKey]) != 'undefined') && (num_leaf_keys == 1)) {
190                                         leaf = leaf[this.dataKey];
191                                 }
192                                 else if (!num_leaf_keys) {
193                                         leaf = '';
194                                 }
195                                 
196                                 // Add leaf to parent branch
197                                 if (typeof(branch[nodeName]) != 'undefined') {
198                                         if (isa_array(branch[nodeName])) {
199                                                 branch[nodeName].push( leaf );
200                                         }
201                                         else {
202                                                 var temp = branch[nodeName];
203                                                 branch[nodeName] = [ temp, leaf ];
204                                         }
205                                 }
206                                 else {
207                                         branch[nodeName] = leaf;
208                                 }
209                                 
210                                 if (this.error() || (branch == this.tree)) break;
211                         } // not closing
212                 } // standard tag
213         } // main reg exp
214         
215         // Make sure we found the closing tag
216         if (name && !foundClosing) {
217                 this.throwParseError( "Missing closing tag (expected </" + name + ">)", name );
218         }
219         
220         // If we are the master node, finish parsing and setup our doc node
221         if (branch == this.tree) {
222                 if (typeof(this.tree[this.dataKey]) != 'undefined') delete this.tree[this.dataKey];
223                 
224                 if (num_keys(this.tree) > 1) {
225                         this.throwParseError( 'Only one top-level node is allowed in document', first_key(this.tree) );
226                         return;
227                 }
228
229                 this.documentNodeName = first_key(this.tree);
230                 if (this.documentNodeName) {
231                         this.tree = this.tree[this.documentNodeName];
232                 }
233         }
234 };
235
236 XML.prototype.throwParseError = function(key, tag) {
237         // log error and locate current line number in source XML document
238         var parsedSource = this.text.substring(0, this.patTag.lastIndex);
239         var eolMatch = parsedSource.match(/\n/g);
240         var lineNum = (eolMatch ? eolMatch.length : 0) + 1;
241         lineNum -= tag.match(/\n/) ? tag.match(/\n/g).length : 0;
242         
243         this.errors.push({ 
244                 type: 'Parse',
245                 key: key,
246                 text: '<' + tag + '>',
247                 line: lineNum
248         });
249         
250         // Throw actual error (must wrap parse in try/catch)
251         throw new Error( this.getLastError() );
252 };
253
254 XML.prototype.error = function() {
255         // return number of errors
256         return this.errors.length;
257 };
258
259 XML.prototype.getError = function(error) {
260         // get formatted error
261         var text = '';
262         if (!error) return '';
263
264         text = (error.type || 'General') + ' Error';
265         if (error.code) text += ' ' + error.code;
266         text += ': ' + error.key;
267         
268         if (error.line) text += ' on line ' + error.line;
269         if (error.text) text += ': ' + error.text;
270
271         return text;
272 };
273
274 XML.prototype.getLastError = function() {
275         // Get most recently thrown error in plain text format
276         if (!this.error()) return '';
277         return this.getError( this.errors[this.errors.length - 1] );
278 };
279
280 XML.prototype.parsePINode = function(tag) {
281         // Parse Processor Instruction Node, e.g. <?xml version="1.0"?>
282         if (!tag.match(this.patPINode)) {
283                 this.throwParseError( "Malformed processor instruction", tag );
284                 return null;
285         }
286         
287         this.piNodeList.push( tag );
288         return tag;
289 };
290
291 XML.prototype.parseCommentNode = function(tag) {
292         // Parse Comment Node, e.g. <!-- hello -->
293         var matches = null;
294         this.patNextClose.lastIndex = this.patTag.lastIndex;
295         
296         while (!tag.match(this.patEndComment)) {
297                 if (matches = this.patNextClose.exec(this.text)) {
298                         tag += '>' + matches[1];
299                 }
300                 else {
301                         this.throwParseError( "Unclosed comment tag", tag );
302                         return null;
303                 }
304         }
305         
306         this.patTag.lastIndex = this.patNextClose.lastIndex;
307         return tag;
308 };
309
310 XML.prototype.parseDTDNode = function(tag) {
311         // Parse Document Type Descriptor Node, e.g. <!DOCTYPE ... >
312         var matches = null;
313         
314         if (tag.match(this.patExternalDTDNode)) {
315                 // tag is external, and thus self-closing
316                 this.dtdNodeList.push( tag );
317         }
318         else if (tag.match(this.patInlineDTDNode)) {
319                 // Tag is inline, so check for nested nodes.
320                 this.patNextClose.lastIndex = this.patTag.lastIndex;
321                 
322                 while (!tag.match(this.patEndDTD)) {
323                         if (matches = this.patNextClose.exec(this.text)) {
324                                 tag += '>' + matches[1];
325                         }
326                         else {
327                                 this.throwParseError( "Unclosed DTD tag", tag );
328                                 return null;
329                         }
330                 }
331                 
332                 this.patTag.lastIndex = this.patNextClose.lastIndex;
333                 
334                 // Make sure complete tag is well-formed, and push onto DTD stack.
335                 if (tag.match(this.patDTDNode)) {
336                         this.dtdNodeList.push( tag );
337                 }
338                 else {
339                         this.throwParseError( "Malformed DTD tag", tag );
340                         return null;
341                 }
342         }
343         else {
344                 this.throwParseError( "Malformed DTD tag", tag );
345                 return null;
346         }
347         
348         return tag;
349 };
350
351 XML.prototype.parseCDATANode = function(tag) {
352         // Parse CDATA Node, e.g. <![CDATA[Brooks & Shields]]>
353         var matches = null;
354         this.patNextClose.lastIndex = this.patTag.lastIndex;
355         
356         while (!tag.match(this.patEndCDATA)) {
357                 if (matches = this.patNextClose.exec(this.text)) {
358                         tag += '>' + matches[1];
359                 }
360                 else {
361                         this.throwParseError( "Unclosed CDATA tag", tag );
362                         return null;
363                 }
364         }
365         
366         this.patTag.lastIndex = this.patNextClose.lastIndex;
367         
368         if (matches = tag.match(this.patCDATANode)) {
369                 return matches[1];
370         }
371         else {
372                 this.throwParseError( "Malformed CDATA tag", tag );
373                 return null;
374         }
375 };
376
377 XML.prototype.getTree = function() {
378         // get reference to parsed XML tree
379         return this.tree;
380 };
381
382 XML.prototype.compose = function() {
383         // compose tree back into XML
384         var raw = compose_xml( this.tree, this.documentNodeName );
385         var body = raw.substring( raw.indexOf("\n") + 1, raw.length );
386         var xml = '';
387         
388         if (this.piNodeList.length) {
389                 for (var idx = 0, len = this.piNodeList.length; idx < len; idx++) {
390                         xml += '<' + this.piNodeList[idx] + '>' + "\n";
391                 }
392         }
393         else {
394                 xml += xml_header + "\n";
395         }
396         
397         if (this.dtdNodeList.length) {
398                 for (var idx = 0, len = this.dtdNodeList.length; idx < len; idx++) {
399                         xml += '<' + this.dtdNodeList[idx] + '>' + "\n";
400                 }
401         }
402         
403         xml += body;
404         return xml;
405 };
406
407 //
408 // Static Utility Functions:
409 //
410
411 var parse_xml = exports.parse = function parse_xml(text, opts) {
412         // turn text into XML tree quickly
413         if (!opts) opts = {};
414         opts.text = text;
415         var parser = new XML(opts);
416         return parser.error() ? parser.getLastError() : parser.getTree();
417 };
418
419 var trim = exports.trim = function trim(text) {
420         // strip whitespace from beginning and end of string
421         if (text == null) return '';
422         
423         if (text && text.replace) {
424                 text = text.replace(/^\s+/, "");
425                 text = text.replace(/\s+$/, "");
426         }
427         
428         return text;
429 };
430
431 var encode_entities = exports.encodeEntities = function encode_entities(text) {
432         // Simple entitize exports.for = function for composing XML
433         if (text == null) return '';
434         
435         if (text && text.replace) {
436                 text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
437                 text = text.replace(/</g, "&lt;");
438                 text = text.replace(/>/g, "&gt;");
439         }
440         
441         return text;
442 };
443
444 var encode_attrib_entities = exports.encodeAttribEntities = function encode_attrib_entities(text) {
445         // Simple entitize exports.for = function for composing XML attributes
446         if (text == null) return '';
447         
448         if (text && text.replace) {
449                 text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
450                 text = text.replace(/</g, "&lt;");
451                 text = text.replace(/>/g, "&gt;");
452                 text = text.replace(/\"/g, "&quot;");
453                 text = text.replace(/\'/g, "&apos;");
454         }
455         
456         return text;
457 };
458
459 var decode_entities = exports.decodeEntities = function decode_entities(text) {
460         // Decode XML entities into raw ASCII
461         if (text == null) return '';
462         
463         if (text && text.replace && text.match(/\&/)) {
464                 text = text.replace(/\&lt\;/g, "<");
465                 text = text.replace(/\&gt\;/g, ">");
466                 text = text.replace(/\&quot\;/g, '"');
467                 text = text.replace(/\&apos\;/g, "'");
468                 text = text.replace(/\&amp\;/g, "&"); // MUST BE LAST
469         }
470         
471         return text;
472 };
473
474 var compose_xml = exports.stringify = function compose_xml(node, name, indent) {
475         // Compose node into XML including attributes
476         // Recurse for child nodes
477         var xml = "";
478         
479         // If this is the root node, set the indent to 0
480         // and setup the XML header (PI node)
481         if (!indent) {
482                 indent = 0;
483                 xml = xml_header + "\n";
484                 
485                 if (!name) {
486                         // no name provided, assume content is wrapped in it
487                         name = first_key(node);
488                         node = node[name];
489                 }
490         }
491         
492         // Setup the indent text
493         var indent_text = "";
494         for (var k = 0; k < indent; k++) indent_text += indent_string;
495
496         if ((typeof(node) == 'object') && (node != null)) {
497                 // node is object -- now see if it is an array or hash
498                 if (!node.length) { // what about zero-length array?
499                         // node is hash
500                         xml += indent_text + "<" + name;
501
502                         var num_keys = 0;
503                         var has_attribs = 0;
504                         for (var key in node) num_keys++; // there must be a better way...
505
506                         if (node["_Attribs"]) {
507                                 has_attribs = 1;
508                                 var sorted_keys = hash_keys_to_array(node["_Attribs"]).sort();
509                                 for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
510                                         var key = sorted_keys[idx];
511                                         xml += " " + key + "=\"" + encode_attrib_entities(node["_Attribs"][key]) + "\"";
512                                 }
513                         } // has attribs
514
515                         if (num_keys > has_attribs) {
516                                 // has child elements
517                                 xml += ">";
518
519                                 if (node["_Data"]) {
520                                         // simple text child node
521                                         xml += encode_entities(node["_Data"]) + "</" + name + ">\n";
522                                 } // just text
523                                 else {
524                                         xml += "\n";
525                                         
526                                         var sorted_keys = hash_keys_to_array(node).sort();
527                                         for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
528                                                 var key = sorted_keys[idx];                                     
529                                                 if ((key != "_Attribs") && key.match(re_valid_tag_name)) {
530                                                         // recurse for node, with incremented indent value
531                                                         xml += compose_xml( node[key], key, indent + 1 );
532                                                 } // not _Attribs key
533                                         } // foreach key
534
535                                         xml += indent_text + "</" + name + ">\n";
536                                 } // real children
537                         }
538                         else {
539                                 // no child elements, so self-close
540                                 xml += "/>\n";
541                         }
542                 } // standard node
543                 else {
544                         // node is array
545                         for (var idx = 0; idx < node.length; idx++) {
546                                 // recurse for node in array with same indent
547                                 xml += compose_xml( node[idx], name, indent );
548                         }
549                 } // array of nodes
550         } // complex node
551         else {
552                 // node is simple string
553                 xml += indent_text + "<" + name + ">" + encode_entities(node) + "</" + name + ">\n";
554         } // simple text node
555
556         return xml;
557 };
558
559 var always_array = exports.alwaysArray = function always_array(obj, key) {
560         // if object is not array, return array containing object
561         // if key is passed, work like XMLalwaysarray() instead
562         if (key) {
563                 if ((typeof(obj[key]) != 'object') || (typeof(obj[key].length) == 'undefined')) {
564                         var temp = obj[key];
565                         delete obj[key];
566                         obj[key] = new Array();
567                         obj[key][0] = temp;
568                 }
569                 return null;
570         }
571         else {
572                 if ((typeof(obj) != 'object') || (typeof(obj.length) == 'undefined')) { return [ obj ]; }
573                 else return obj;
574         }
575 };
576
577 var hash_keys_to_array = exports.hashKeysToArray = function hash_keys_to_array(hash) {
578         // convert hash keys to array (discard values)
579         var array = [];
580         for (var key in hash) array.push(key);
581         return array;
582 };
583
584 var isa_hash = exports.isaHash = function isa_hash(arg) {
585         // determine if arg is a hash
586         return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) == 'undefined') );
587 };
588
589 var isa_array = exports.isaArray = function isa_array(arg) {
590         // determine if arg is an array or is array-like
591         if (typeof(arg) == 'array') return true;
592         return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) != 'undefined') );
593 };
594
595 var first_key = exports.firstKey = function first_key(hash) {
596         // return first key from hash (unordered)
597         for (var key in hash) return key;
598         return null; // no keys in hash
599 };
600
601 var num_keys = exports.numKeys = function num_keys(hash) {
602         // count the number of keys in a hash
603         var count = 0;
604         for (var a in hash) count++;
605         return count;
606 };