]> arthur.barton.de Git - netdata.git/blob - python.d/python_modules/pyyaml3/parser.py
bundle pyyaml
[netdata.git] / python.d / python_modules / pyyaml3 / parser.py
1
2 # The following YAML grammar is LL(1) and is parsed by a recursive descent
3 # parser.
4 #
5 # stream            ::= STREAM-START implicit_document? explicit_document* STREAM-END
6 # implicit_document ::= block_node DOCUMENT-END*
7 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
8 # block_node_or_indentless_sequence ::=
9 #                       ALIAS
10 #                       | properties (block_content | indentless_block_sequence)?
11 #                       | block_content
12 #                       | indentless_block_sequence
13 # block_node        ::= ALIAS
14 #                       | properties block_content?
15 #                       | block_content
16 # flow_node         ::= ALIAS
17 #                       | properties flow_content?
18 #                       | flow_content
19 # properties        ::= TAG ANCHOR? | ANCHOR TAG?
20 # block_content     ::= block_collection | flow_collection | SCALAR
21 # flow_content      ::= flow_collection | SCALAR
22 # block_collection  ::= block_sequence | block_mapping
23 # flow_collection   ::= flow_sequence | flow_mapping
24 # block_sequence    ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
25 # indentless_sequence   ::= (BLOCK-ENTRY block_node?)+
26 # block_mapping     ::= BLOCK-MAPPING_START
27 #                       ((KEY block_node_or_indentless_sequence?)?
28 #                       (VALUE block_node_or_indentless_sequence?)?)*
29 #                       BLOCK-END
30 # flow_sequence     ::= FLOW-SEQUENCE-START
31 #                       (flow_sequence_entry FLOW-ENTRY)*
32 #                       flow_sequence_entry?
33 #                       FLOW-SEQUENCE-END
34 # flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
35 # flow_mapping      ::= FLOW-MAPPING-START
36 #                       (flow_mapping_entry FLOW-ENTRY)*
37 #                       flow_mapping_entry?
38 #                       FLOW-MAPPING-END
39 # flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
40 #
41 # FIRST sets:
42 #
43 # stream: { STREAM-START }
44 # explicit_document: { DIRECTIVE DOCUMENT-START }
45 # implicit_document: FIRST(block_node)
46 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
47 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
48 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
49 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
50 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
51 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
52 # block_sequence: { BLOCK-SEQUENCE-START }
53 # block_mapping: { BLOCK-MAPPING-START }
54 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
55 # indentless_sequence: { ENTRY }
56 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
57 # flow_sequence: { FLOW-SEQUENCE-START }
58 # flow_mapping: { FLOW-MAPPING-START }
59 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
60 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
61
62 __all__ = ['Parser', 'ParserError']
63
64 from .error import MarkedYAMLError
65 from .tokens import *
66 from .events import *
67 from .scanner import *
68
69 class ParserError(MarkedYAMLError):
70     pass
71
72 class Parser:
73     # Since writing a recursive-descendant parser is a straightforward task, we
74     # do not give many comments here.
75
76     DEFAULT_TAGS = {
77         '!':   '!',
78         '!!':  'tag:yaml.org,2002:',
79     }
80
81     def __init__(self):
82         self.current_event = None
83         self.yaml_version = None
84         self.tag_handles = {}
85         self.states = []
86         self.marks = []
87         self.state = self.parse_stream_start
88
89     def dispose(self):
90         # Reset the state attributes (to clear self-references)
91         self.states = []
92         self.state = None
93
94     def check_event(self, *choices):
95         # Check the type of the next event.
96         if self.current_event is None:
97             if self.state:
98                 self.current_event = self.state()
99         if self.current_event is not None:
100             if not choices:
101                 return True
102             for choice in choices:
103                 if isinstance(self.current_event, choice):
104                     return True
105         return False
106
107     def peek_event(self):
108         # Get the next event.
109         if self.current_event is None:
110             if self.state:
111                 self.current_event = self.state()
112         return self.current_event
113
114     def get_event(self):
115         # Get the next event and proceed further.
116         if self.current_event is None:
117             if self.state:
118                 self.current_event = self.state()
119         value = self.current_event
120         self.current_event = None
121         return value
122
123     # stream    ::= STREAM-START implicit_document? explicit_document* STREAM-END
124     # implicit_document ::= block_node DOCUMENT-END*
125     # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
126
127     def parse_stream_start(self):
128
129         # Parse the stream start.
130         token = self.get_token()
131         event = StreamStartEvent(token.start_mark, token.end_mark,
132                 encoding=token.encoding)
133
134         # Prepare the next state.
135         self.state = self.parse_implicit_document_start
136
137         return event
138
139     def parse_implicit_document_start(self):
140
141         # Parse an implicit document.
142         if not self.check_token(DirectiveToken, DocumentStartToken,
143                 StreamEndToken):
144             self.tag_handles = self.DEFAULT_TAGS
145             token = self.peek_token()
146             start_mark = end_mark = token.start_mark
147             event = DocumentStartEvent(start_mark, end_mark,
148                     explicit=False)
149
150             # Prepare the next state.
151             self.states.append(self.parse_document_end)
152             self.state = self.parse_block_node
153
154             return event
155
156         else:
157             return self.parse_document_start()
158
159     def parse_document_start(self):
160
161         # Parse any extra document end indicators.
162         while self.check_token(DocumentEndToken):
163             self.get_token()
164
165         # Parse an explicit document.
166         if not self.check_token(StreamEndToken):
167             token = self.peek_token()
168             start_mark = token.start_mark
169             version, tags = self.process_directives()
170             if not self.check_token(DocumentStartToken):
171                 raise ParserError(None, None,
172                         "expected '<document start>', but found %r"
173                         % self.peek_token().id,
174                         self.peek_token().start_mark)
175             token = self.get_token()
176             end_mark = token.end_mark
177             event = DocumentStartEvent(start_mark, end_mark,
178                     explicit=True, version=version, tags=tags)
179             self.states.append(self.parse_document_end)
180             self.state = self.parse_document_content
181         else:
182             # Parse the end of the stream.
183             token = self.get_token()
184             event = StreamEndEvent(token.start_mark, token.end_mark)
185             assert not self.states
186             assert not self.marks
187             self.state = None
188         return event
189
190     def parse_document_end(self):
191
192         # Parse the document end.
193         token = self.peek_token()
194         start_mark = end_mark = token.start_mark
195         explicit = False
196         if self.check_token(DocumentEndToken):
197             token = self.get_token()
198             end_mark = token.end_mark
199             explicit = True
200         event = DocumentEndEvent(start_mark, end_mark,
201                 explicit=explicit)
202
203         # Prepare the next state.
204         self.state = self.parse_document_start
205
206         return event
207
208     def parse_document_content(self):
209         if self.check_token(DirectiveToken,
210                 DocumentStartToken, DocumentEndToken, StreamEndToken):
211             event = self.process_empty_scalar(self.peek_token().start_mark)
212             self.state = self.states.pop()
213             return event
214         else:
215             return self.parse_block_node()
216
217     def process_directives(self):
218         self.yaml_version = None
219         self.tag_handles = {}
220         while self.check_token(DirectiveToken):
221             token = self.get_token()
222             if token.name == 'YAML':
223                 if self.yaml_version is not None:
224                     raise ParserError(None, None,
225                             "found duplicate YAML directive", token.start_mark)
226                 major, minor = token.value
227                 if major != 1:
228                     raise ParserError(None, None,
229                             "found incompatible YAML document (version 1.* is required)",
230                             token.start_mark)
231                 self.yaml_version = token.value
232             elif token.name == 'TAG':
233                 handle, prefix = token.value
234                 if handle in self.tag_handles:
235                     raise ParserError(None, None,
236                             "duplicate tag handle %r" % handle,
237                             token.start_mark)
238                 self.tag_handles[handle] = prefix
239         if self.tag_handles:
240             value = self.yaml_version, self.tag_handles.copy()
241         else:
242             value = self.yaml_version, None
243         for key in self.DEFAULT_TAGS:
244             if key not in self.tag_handles:
245                 self.tag_handles[key] = self.DEFAULT_TAGS[key]
246         return value
247
248     # block_node_or_indentless_sequence ::= ALIAS
249     #               | properties (block_content | indentless_block_sequence)?
250     #               | block_content
251     #               | indentless_block_sequence
252     # block_node    ::= ALIAS
253     #                   | properties block_content?
254     #                   | block_content
255     # flow_node     ::= ALIAS
256     #                   | properties flow_content?
257     #                   | flow_content
258     # properties    ::= TAG ANCHOR? | ANCHOR TAG?
259     # block_content     ::= block_collection | flow_collection | SCALAR
260     # flow_content      ::= flow_collection | SCALAR
261     # block_collection  ::= block_sequence | block_mapping
262     # flow_collection   ::= flow_sequence | flow_mapping
263
264     def parse_block_node(self):
265         return self.parse_node(block=True)
266
267     def parse_flow_node(self):
268         return self.parse_node()
269
270     def parse_block_node_or_indentless_sequence(self):
271         return self.parse_node(block=True, indentless_sequence=True)
272
273     def parse_node(self, block=False, indentless_sequence=False):
274         if self.check_token(AliasToken):
275             token = self.get_token()
276             event = AliasEvent(token.value, token.start_mark, token.end_mark)
277             self.state = self.states.pop()
278         else:
279             anchor = None
280             tag = None
281             start_mark = end_mark = tag_mark = None
282             if self.check_token(AnchorToken):
283                 token = self.get_token()
284                 start_mark = token.start_mark
285                 end_mark = token.end_mark
286                 anchor = token.value
287                 if self.check_token(TagToken):
288                     token = self.get_token()
289                     tag_mark = token.start_mark
290                     end_mark = token.end_mark
291                     tag = token.value
292             elif self.check_token(TagToken):
293                 token = self.get_token()
294                 start_mark = tag_mark = token.start_mark
295                 end_mark = token.end_mark
296                 tag = token.value
297                 if self.check_token(AnchorToken):
298                     token = self.get_token()
299                     end_mark = token.end_mark
300                     anchor = token.value
301             if tag is not None:
302                 handle, suffix = tag
303                 if handle is not None:
304                     if handle not in self.tag_handles:
305                         raise ParserError("while parsing a node", start_mark,
306                                 "found undefined tag handle %r" % handle,
307                                 tag_mark)
308                     tag = self.tag_handles[handle]+suffix
309                 else:
310                     tag = suffix
311             #if tag == '!':
312             #    raise ParserError("while parsing a node", start_mark,
313             #            "found non-specific tag '!'", tag_mark,
314             #            "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
315             if start_mark is None:
316                 start_mark = end_mark = self.peek_token().start_mark
317             event = None
318             implicit = (tag is None or tag == '!')
319             if indentless_sequence and self.check_token(BlockEntryToken):
320                 end_mark = self.peek_token().end_mark
321                 event = SequenceStartEvent(anchor, tag, implicit,
322                         start_mark, end_mark)
323                 self.state = self.parse_indentless_sequence_entry
324             else:
325                 if self.check_token(ScalarToken):
326                     token = self.get_token()
327                     end_mark = token.end_mark
328                     if (token.plain and tag is None) or tag == '!':
329                         implicit = (True, False)
330                     elif tag is None:
331                         implicit = (False, True)
332                     else:
333                         implicit = (False, False)
334                     event = ScalarEvent(anchor, tag, implicit, token.value,
335                             start_mark, end_mark, style=token.style)
336                     self.state = self.states.pop()
337                 elif self.check_token(FlowSequenceStartToken):
338                     end_mark = self.peek_token().end_mark
339                     event = SequenceStartEvent(anchor, tag, implicit,
340                             start_mark, end_mark, flow_style=True)
341                     self.state = self.parse_flow_sequence_first_entry
342                 elif self.check_token(FlowMappingStartToken):
343                     end_mark = self.peek_token().end_mark
344                     event = MappingStartEvent(anchor, tag, implicit,
345                             start_mark, end_mark, flow_style=True)
346                     self.state = self.parse_flow_mapping_first_key
347                 elif block and self.check_token(BlockSequenceStartToken):
348                     end_mark = self.peek_token().start_mark
349                     event = SequenceStartEvent(anchor, tag, implicit,
350                             start_mark, end_mark, flow_style=False)
351                     self.state = self.parse_block_sequence_first_entry
352                 elif block and self.check_token(BlockMappingStartToken):
353                     end_mark = self.peek_token().start_mark
354                     event = MappingStartEvent(anchor, tag, implicit,
355                             start_mark, end_mark, flow_style=False)
356                     self.state = self.parse_block_mapping_first_key
357                 elif anchor is not None or tag is not None:
358                     # Empty scalars are allowed even if a tag or an anchor is
359                     # specified.
360                     event = ScalarEvent(anchor, tag, (implicit, False), '',
361                             start_mark, end_mark)
362                     self.state = self.states.pop()
363                 else:
364                     if block:
365                         node = 'block'
366                     else:
367                         node = 'flow'
368                     token = self.peek_token()
369                     raise ParserError("while parsing a %s node" % node, start_mark,
370                             "expected the node content, but found %r" % token.id,
371                             token.start_mark)
372         return event
373
374     # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
375
376     def parse_block_sequence_first_entry(self):
377         token = self.get_token()
378         self.marks.append(token.start_mark)
379         return self.parse_block_sequence_entry()
380
381     def parse_block_sequence_entry(self):
382         if self.check_token(BlockEntryToken):
383             token = self.get_token()
384             if not self.check_token(BlockEntryToken, BlockEndToken):
385                 self.states.append(self.parse_block_sequence_entry)
386                 return self.parse_block_node()
387             else:
388                 self.state = self.parse_block_sequence_entry
389                 return self.process_empty_scalar(token.end_mark)
390         if not self.check_token(BlockEndToken):
391             token = self.peek_token()
392             raise ParserError("while parsing a block collection", self.marks[-1],
393                     "expected <block end>, but found %r" % token.id, token.start_mark)
394         token = self.get_token()
395         event = SequenceEndEvent(token.start_mark, token.end_mark)
396         self.state = self.states.pop()
397         self.marks.pop()
398         return event
399
400     # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
401
402     def parse_indentless_sequence_entry(self):
403         if self.check_token(BlockEntryToken):
404             token = self.get_token()
405             if not self.check_token(BlockEntryToken,
406                     KeyToken, ValueToken, BlockEndToken):
407                 self.states.append(self.parse_indentless_sequence_entry)
408                 return self.parse_block_node()
409             else:
410                 self.state = self.parse_indentless_sequence_entry
411                 return self.process_empty_scalar(token.end_mark)
412         token = self.peek_token()
413         event = SequenceEndEvent(token.start_mark, token.start_mark)
414         self.state = self.states.pop()
415         return event
416
417     # block_mapping     ::= BLOCK-MAPPING_START
418     #                       ((KEY block_node_or_indentless_sequence?)?
419     #                       (VALUE block_node_or_indentless_sequence?)?)*
420     #                       BLOCK-END
421
422     def parse_block_mapping_first_key(self):
423         token = self.get_token()
424         self.marks.append(token.start_mark)
425         return self.parse_block_mapping_key()
426
427     def parse_block_mapping_key(self):
428         if self.check_token(KeyToken):
429             token = self.get_token()
430             if not self.check_token(KeyToken, ValueToken, BlockEndToken):
431                 self.states.append(self.parse_block_mapping_value)
432                 return self.parse_block_node_or_indentless_sequence()
433             else:
434                 self.state = self.parse_block_mapping_value
435                 return self.process_empty_scalar(token.end_mark)
436         if not self.check_token(BlockEndToken):
437             token = self.peek_token()
438             raise ParserError("while parsing a block mapping", self.marks[-1],
439                     "expected <block end>, but found %r" % token.id, token.start_mark)
440         token = self.get_token()
441         event = MappingEndEvent(token.start_mark, token.end_mark)
442         self.state = self.states.pop()
443         self.marks.pop()
444         return event
445
446     def parse_block_mapping_value(self):
447         if self.check_token(ValueToken):
448             token = self.get_token()
449             if not self.check_token(KeyToken, ValueToken, BlockEndToken):
450                 self.states.append(self.parse_block_mapping_key)
451                 return self.parse_block_node_or_indentless_sequence()
452             else:
453                 self.state = self.parse_block_mapping_key
454                 return self.process_empty_scalar(token.end_mark)
455         else:
456             self.state = self.parse_block_mapping_key
457             token = self.peek_token()
458             return self.process_empty_scalar(token.start_mark)
459
460     # flow_sequence     ::= FLOW-SEQUENCE-START
461     #                       (flow_sequence_entry FLOW-ENTRY)*
462     #                       flow_sequence_entry?
463     #                       FLOW-SEQUENCE-END
464     # flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
465     #
466     # Note that while production rules for both flow_sequence_entry and
467     # flow_mapping_entry are equal, their interpretations are different.
468     # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
469     # generate an inline mapping (set syntax).
470
471     def parse_flow_sequence_first_entry(self):
472         token = self.get_token()
473         self.marks.append(token.start_mark)
474         return self.parse_flow_sequence_entry(first=True)
475
476     def parse_flow_sequence_entry(self, first=False):
477         if not self.check_token(FlowSequenceEndToken):
478             if not first:
479                 if self.check_token(FlowEntryToken):
480                     self.get_token()
481                 else:
482                     token = self.peek_token()
483                     raise ParserError("while parsing a flow sequence", self.marks[-1],
484                             "expected ',' or ']', but got %r" % token.id, token.start_mark)
485             
486             if self.check_token(KeyToken):
487                 token = self.peek_token()
488                 event = MappingStartEvent(None, None, True,
489                         token.start_mark, token.end_mark,
490                         flow_style=True)
491                 self.state = self.parse_flow_sequence_entry_mapping_key
492                 return event
493             elif not self.check_token(FlowSequenceEndToken):
494                 self.states.append(self.parse_flow_sequence_entry)
495                 return self.parse_flow_node()
496         token = self.get_token()
497         event = SequenceEndEvent(token.start_mark, token.end_mark)
498         self.state = self.states.pop()
499         self.marks.pop()
500         return event
501
502     def parse_flow_sequence_entry_mapping_key(self):
503         token = self.get_token()
504         if not self.check_token(ValueToken,
505                 FlowEntryToken, FlowSequenceEndToken):
506             self.states.append(self.parse_flow_sequence_entry_mapping_value)
507             return self.parse_flow_node()
508         else:
509             self.state = self.parse_flow_sequence_entry_mapping_value
510             return self.process_empty_scalar(token.end_mark)
511
512     def parse_flow_sequence_entry_mapping_value(self):
513         if self.check_token(ValueToken):
514             token = self.get_token()
515             if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
516                 self.states.append(self.parse_flow_sequence_entry_mapping_end)
517                 return self.parse_flow_node()
518             else:
519                 self.state = self.parse_flow_sequence_entry_mapping_end
520                 return self.process_empty_scalar(token.end_mark)
521         else:
522             self.state = self.parse_flow_sequence_entry_mapping_end
523             token = self.peek_token()
524             return self.process_empty_scalar(token.start_mark)
525
526     def parse_flow_sequence_entry_mapping_end(self):
527         self.state = self.parse_flow_sequence_entry
528         token = self.peek_token()
529         return MappingEndEvent(token.start_mark, token.start_mark)
530
531     # flow_mapping  ::= FLOW-MAPPING-START
532     #                   (flow_mapping_entry FLOW-ENTRY)*
533     #                   flow_mapping_entry?
534     #                   FLOW-MAPPING-END
535     # flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
536
537     def parse_flow_mapping_first_key(self):
538         token = self.get_token()
539         self.marks.append(token.start_mark)
540         return self.parse_flow_mapping_key(first=True)
541
542     def parse_flow_mapping_key(self, first=False):
543         if not self.check_token(FlowMappingEndToken):
544             if not first:
545                 if self.check_token(FlowEntryToken):
546                     self.get_token()
547                 else:
548                     token = self.peek_token()
549                     raise ParserError("while parsing a flow mapping", self.marks[-1],
550                             "expected ',' or '}', but got %r" % token.id, token.start_mark)
551             if self.check_token(KeyToken):
552                 token = self.get_token()
553                 if not self.check_token(ValueToken,
554                         FlowEntryToken, FlowMappingEndToken):
555                     self.states.append(self.parse_flow_mapping_value)
556                     return self.parse_flow_node()
557                 else:
558                     self.state = self.parse_flow_mapping_value
559                     return self.process_empty_scalar(token.end_mark)
560             elif not self.check_token(FlowMappingEndToken):
561                 self.states.append(self.parse_flow_mapping_empty_value)
562                 return self.parse_flow_node()
563         token = self.get_token()
564         event = MappingEndEvent(token.start_mark, token.end_mark)
565         self.state = self.states.pop()
566         self.marks.pop()
567         return event
568
569     def parse_flow_mapping_value(self):
570         if self.check_token(ValueToken):
571             token = self.get_token()
572             if not self.check_token(FlowEntryToken, FlowMappingEndToken):
573                 self.states.append(self.parse_flow_mapping_key)
574                 return self.parse_flow_node()
575             else:
576                 self.state = self.parse_flow_mapping_key
577                 return self.process_empty_scalar(token.end_mark)
578         else:
579             self.state = self.parse_flow_mapping_key
580             token = self.peek_token()
581             return self.process_empty_scalar(token.start_mark)
582
583     def parse_flow_mapping_empty_value(self):
584         self.state = self.parse_flow_mapping_key
585         return self.process_empty_scalar(self.peek_token().start_mark)
586
587     def process_empty_scalar(self, mark):
588         return ScalarEvent(None, None, (True, False), '', mark, mark)
589