python.d/python_modules/pyyaml2/emitter.py

   1
   2 # Emitter expects events obeying the following grammar:
   3 # stream ::= STREAM-START document* STREAM-END
   4 # document ::= DOCUMENT-START node DOCUMENT-END
   5 # node ::= SCALAR | sequence | mapping
   6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
   7 # mapping ::= MAPPING-START (node node)* MAPPING-END
   8
   9 __all__ = ['Emitter', 'EmitterError']
  10
  11 from error import YAMLError
  12 from events import *
  13
  14 class EmitterError(YAMLError):
  15     pass
  16
  17 class ScalarAnalysis(object):
  18     def __init__(self, scalar, empty, multiline,
  19             allow_flow_plain, allow_block_plain,
  20             allow_single_quoted, allow_double_quoted,
  21             allow_block):
  22         self.scalar = scalar
  23         self.empty = empty
  24         self.multiline = multiline
  25         self.allow_flow_plain = allow_flow_plain
  26         self.allow_block_plain = allow_block_plain
  27         self.allow_single_quoted = allow_single_quoted
  28         self.allow_double_quoted = allow_double_quoted
  29         self.allow_block = allow_block
  30
  31 class Emitter(object):
  32
  33     DEFAULT_TAG_PREFIXES = {
  34         u'!' : u'!',
  35         u'tag:yaml.org,2002:' : u'!!',
  36     }
  37
  38     def __init__(self, stream, canonical=None, indent=None, width=None,
  39             allow_unicode=None, line_break=None):
  40
  41         # The stream should have the methods `write` and possibly `flush`.
  42         self.stream = stream
  43
  44         # Encoding can be overriden by STREAM-START.
  45         self.encoding = None
  46
  47         # Emitter is a state machine with a stack of states to handle nested
  48         # structures.
  49         self.states = []
  50         self.state = self.expect_stream_start
  51
  52         # Current event and the event queue.
  53         self.events = []
  54         self.event = None
  55
  56         # The current indentation level and the stack of previous indents.
  57         self.indents = []
  58         self.indent = None
  59
  60         # Flow level.
  61         self.flow_level = 0
  62
  63         # Contexts.
  64         self.root_context = False
  65         self.sequence_context = False
  66         self.mapping_context = False
  67         self.simple_key_context = False
  68
  69         # Characteristics of the last emitted character:
  70         #  - current position.
  71         #  - is it a whitespace?
  72         #  - is it an indention character
  73         #    (indentation space, '-', '?', or ':')?
  74         self.line = 0
  75         self.column = 0
  76         self.whitespace = True
  77         self.indention = True
  78
  79         # Whether the document requires an explicit document indicator
  80         self.open_ended = False
  81
  82         # Formatting details.
  83         self.canonical = canonical
  84         self.allow_unicode = allow_unicode
  85         self.best_indent = 2
  86         if indent and 1 < indent < 10:
  87             self.best_indent = indent
  88         self.best_width = 80
  89         if width and width > self.best_indent*2:
  90             self.best_width = width
  91         self.best_line_break = u'\n'
  92         if line_break in [u'\r', u'\n', u'\r\n']:
  93             self.best_line_break = line_break
  94
  95         # Tag prefixes.
  96         self.tag_prefixes = None
  97
  98         # Prepared anchor and tag.
  99         self.prepared_anchor = None
 100         self.prepared_tag = None
 101
 102         # Scalar analysis and style.
 103         self.analysis = None
 104         self.style = None
 105
 106     def dispose(self):
 107         # Reset the state attributes (to clear self-references)
 108         self.states = []
 109         self.state = None
 110
 111     def emit(self, event):
 112         self.events.append(event)
 113         while not self.need_more_events():
 114             self.event = self.events.pop(0)
 115             self.state()
 116             self.event = None
 117
 118     # In some cases, we wait for a few next events before emitting.
 119
 120     def need_more_events(self):
 121         if not self.events:
 122             return True
 123         event = self.events[0]
 124         if isinstance(event, DocumentStartEvent):
 125             return self.need_events(1)
 126         elif isinstance(event, SequenceStartEvent):
 127             return self.need_events(2)
 128         elif isinstance(event, MappingStartEvent):
 129             return self.need_events(3)
 130         else:
 131             return False
 132
 133     def need_events(self, count):
 134         level = 0
 135         for event in self.events[1:]:
 136             if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
 137                 level += 1
 138             elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
 139                 level -= 1
 140             elif isinstance(event, StreamEndEvent):
 141                 level = -1
 142             if level < 0:
 143                 return False
 144         return (len(self.events) < count+1)
 145
 146     def increase_indent(self, flow=False, indentless=False):
 147         self.indents.append(self.indent)
 148         if self.indent is None:
 149             if flow:
 150                 self.indent = self.best_indent
 151             else:
 152                 self.indent = 0
 153         elif not indentless:
 154             self.indent += self.best_indent
 155
 156     # States.
 157
 158     # Stream handlers.
 159
 160     def expect_stream_start(self):
 161         if isinstance(self.event, StreamStartEvent):
 162             if self.event.encoding and not getattr(self.stream, 'encoding', None):
 163                 self.encoding = self.event.encoding
 164             self.write_stream_start()
 165             self.state = self.expect_first_document_start
 166         else:
 167             raise EmitterError("expected StreamStartEvent, but got %s"
 168                     % self.event)
 169
 170     def expect_nothing(self):
 171         raise EmitterError("expected nothing, but got %s" % self.event)
 172
 173     # Document handlers.
 174
 175     def expect_first_document_start(self):
 176         return self.expect_document_start(first=True)
 177
 178     def expect_document_start(self, first=False):
 179         if isinstance(self.event, DocumentStartEvent):
 180             if (self.event.version or self.event.tags) and self.open_ended:
 181                 self.write_indicator(u'...', True)
 182                 self.write_indent()
 183             if self.event.version:
 184                 version_text = self.prepare_version(self.event.version)
 185                 self.write_version_directive(version_text)
 186             self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
 187             if self.event.tags:
 188                 handles = self.event.tags.keys()
 189                 handles.sort()
 190                 for handle in handles:
 191                     prefix = self.event.tags[handle]
 192                     self.tag_prefixes[prefix] = handle
 193                     handle_text = self.prepare_tag_handle(handle)
 194                     prefix_text = self.prepare_tag_prefix(prefix)
 195                     self.write_tag_directive(handle_text, prefix_text)
 196             implicit = (first and not self.event.explicit and not self.canonical
 197                     and not self.event.version and not self.event.tags
 198                     and not self.check_empty_document())
 199             if not implicit:
 200                 self.write_indent()
 201                 self.write_indicator(u'---', True)
 202                 if self.canonical:
 203                     self.write_indent()
 204             self.state = self.expect_document_root
 205         elif isinstance(self.event, StreamEndEvent):
 206             if self.open_ended:
 207                 self.write_indicator(u'...', True)
 208                 self.write_indent()
 209             self.write_stream_end()
 210             self.state = self.expect_nothing
 211         else:
 212             raise EmitterError("expected DocumentStartEvent, but got %s"
 213                     % self.event)
 214
 215     def expect_document_end(self):
 216         if isinstance(self.event, DocumentEndEvent):
 217             self.write_indent()
 218             if self.event.explicit:
 219                 self.write_indicator(u'...', True)
 220                 self.write_indent()
 221             self.flush_stream()
 222             self.state = self.expect_document_start
 223         else:
 224             raise EmitterError("expected DocumentEndEvent, but got %s"
 225                     % self.event)
 226
 227     def expect_document_root(self):
 228         self.states.append(self.expect_document_end)
 229         self.expect_node(root=True)
 230
 231     # Node handlers.
 232
 233     def expect_node(self, root=False, sequence=False, mapping=False,
 234             simple_key=False):
 235         self.root_context = root
 236         self.sequence_context = sequence
 237         self.mapping_context = mapping
 238         self.simple_key_context = simple_key
 239         if isinstance(self.event, AliasEvent):
 240             self.expect_alias()
 241         elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
 242             self.process_anchor(u'&')
 243             self.process_tag()
 244             if isinstance(self.event, ScalarEvent):
 245                 self.expect_scalar()
 246             elif isinstance(self.event, SequenceStartEvent):
 247                 if self.flow_level or self.canonical or self.event.flow_style   \
 248                         or self.check_empty_sequence():
 249                     self.expect_flow_sequence()
 250                 else:
 251                     self.expect_block_sequence()
 252             elif isinstance(self.event, MappingStartEvent):
 253                 if self.flow_level or self.canonical or self.event.flow_style   \
 254                         or self.check_empty_mapping():
 255                     self.expect_flow_mapping()
 256                 else:
 257                     self.expect_block_mapping()
 258         else:
 259             raise EmitterError("expected NodeEvent, but got %s" % self.event)
 260
 261     def expect_alias(self):
 262         if self.event.anchor is None:
 263             raise EmitterError("anchor is not specified for alias")
 264         self.process_anchor(u'*')
 265         self.state = self.states.pop()
 266
 267     def expect_scalar(self):
 268         self.increase_indent(flow=True)
 269         self.process_scalar()
 270         self.indent = self.indents.pop()
 271         self.state = self.states.pop()
 272
 273     # Flow sequence handlers.
 274
 275     def expect_flow_sequence(self):
 276         self.write_indicator(u'[', True, whitespace=True)
 277         self.flow_level += 1
 278         self.increase_indent(flow=True)
 279         self.state = self.expect_first_flow_sequence_item
 280
 281     def expect_first_flow_sequence_item(self):
 282         if isinstance(self.event, SequenceEndEvent):
 283             self.indent = self.indents.pop()
 284             self.flow_level -= 1
 285             self.write_indicator(u']', False)
 286             self.state = self.states.pop()
 287         else:
 288             if self.canonical or self.column > self.best_width:
 289                 self.write_indent()
 290             self.states.append(self.expect_flow_sequence_item)
 291             self.expect_node(sequence=True)
 292
 293     def expect_flow_sequence_item(self):
 294         if isinstance(self.event, SequenceEndEvent):
 295             self.indent = self.indents.pop()
 296             self.flow_level -= 1
 297             if self.canonical:
 298                 self.write_indicator(u',', False)
 299                 self.write_indent()
 300             self.write_indicator(u']', False)
 301             self.state = self.states.pop()
 302         else:
 303             self.write_indicator(u',', False)
 304             if self.canonical or self.column > self.best_width:
 305                 self.write_indent()
 306             self.states.append(self.expect_flow_sequence_item)
 307             self.expect_node(sequence=True)
 308
 309     # Flow mapping handlers.
 310
 311     def expect_flow_mapping(self):
 312         self.write_indicator(u'{', True, whitespace=True)
 313         self.flow_level += 1
 314         self.increase_indent(flow=True)
 315         self.state = self.expect_first_flow_mapping_key
 316
 317     def expect_first_flow_mapping_key(self):
 318         if isinstance(self.event, MappingEndEvent):
 319             self.indent = self.indents.pop()
 320             self.flow_level -= 1
 321             self.write_indicator(u'}', False)
 322             self.state = self.states.pop()
 323         else:
 324             if self.canonical or self.column > self.best_width:
 325                 self.write_indent()
 326             if not self.canonical and self.check_simple_key():
 327                 self.states.append(self.expect_flow_mapping_simple_value)
 328                 self.expect_node(mapping=True, simple_key=True)
 329             else:
 330                 self.write_indicator(u'?', True)
 331                 self.states.append(self.expect_flow_mapping_value)
 332                 self.expect_node(mapping=True)
 333
 334     def expect_flow_mapping_key(self):
 335         if isinstance(self.event, MappingEndEvent):
 336             self.indent = self.indents.pop()
 337             self.flow_level -= 1
 338             if self.canonical:
 339                 self.write_indicator(u',', False)
 340                 self.write_indent()
 341             self.write_indicator(u'}', False)
 342             self.state = self.states.pop()
 343         else:
 344             self.write_indicator(u',', False)
 345             if self.canonical or self.column > self.best_width:
 346                 self.write_indent()
 347             if not self.canonical and self.check_simple_key():
 348                 self.states.append(self.expect_flow_mapping_simple_value)
 349                 self.expect_node(mapping=True, simple_key=True)
 350             else:
 351                 self.write_indicator(u'?', True)
 352                 self.states.append(self.expect_flow_mapping_value)
 353                 self.expect_node(mapping=True)
 354
 355     def expect_flow_mapping_simple_value(self):
 356         self.write_indicator(u':', False)
 357         self.states.append(self.expect_flow_mapping_key)
 358         self.expect_node(mapping=True)
 359
 360     def expect_flow_mapping_value(self):
 361         if self.canonical or self.column > self.best_width:
 362             self.write_indent()
 363         self.write_indicator(u':', True)
 364         self.states.append(self.expect_flow_mapping_key)
 365         self.expect_node(mapping=True)
 366
 367     # Block sequence handlers.
 368
 369     def expect_block_sequence(self):
 370         indentless = (self.mapping_context and not self.indention)
 371         self.increase_indent(flow=False, indentless=indentless)
 372         self.state = self.expect_first_block_sequence_item
 373
 374     def expect_first_block_sequence_item(self):
 375         return self.expect_block_sequence_item(first=True)
 376
 377     def expect_block_sequence_item(self, first=False):
 378         if not first and isinstance(self.event, SequenceEndEvent):
 379             self.indent = self.indents.pop()
 380             self.state = self.states.pop()
 381         else:
 382             self.write_indent()
 383             self.write_indicator(u'-', True, indention=True)
 384             self.states.append(self.expect_block_sequence_item)
 385             self.expect_node(sequence=True)
 386
 387     # Block mapping handlers.
 388
 389     def expect_block_mapping(self):
 390         self.increase_indent(flow=False)
 391         self.state = self.expect_first_block_mapping_key
 392
 393     def expect_first_block_mapping_key(self):
 394         return self.expect_block_mapping_key(first=True)
 395
 396     def expect_block_mapping_key(self, first=False):
 397         if not first and isinstance(self.event, MappingEndEvent):
 398             self.indent = self.indents.pop()
 399             self.state = self.states.pop()
 400         else:
 401             self.write_indent()
 402             if self.check_simple_key():
 403                 self.states.append(self.expect_block_mapping_simple_value)
 404                 self.expect_node(mapping=True, simple_key=True)
 405             else:
 406                 self.write_indicator(u'?', True, indention=True)
 407                 self.states.append(self.expect_block_mapping_value)
 408                 self.expect_node(mapping=True)
 409
 410     def expect_block_mapping_simple_value(self):
 411         self.write_indicator(u':', False)
 412         self.states.append(self.expect_block_mapping_key)
 413         self.expect_node(mapping=True)
 414
 415     def expect_block_mapping_value(self):
 416         self.write_indent()
 417         self.write_indicator(u':', True, indention=True)
 418         self.states.append(self.expect_block_mapping_key)
 419         self.expect_node(mapping=True)
 420
 421     # Checkers.
 422
 423     def check_empty_sequence(self):
 424         return (isinstance(self.event, SequenceStartEvent) and self.events
 425                 and isinstance(self.events[0], SequenceEndEvent))
 426
 427     def check_empty_mapping(self):
 428         return (isinstance(self.event, MappingStartEvent) and self.events
 429                 and isinstance(self.events[0], MappingEndEvent))
 430
 431     def check_empty_document(self):
 432         if not isinstance(self.event, DocumentStartEvent) or not self.events:
 433             return False
 434         event = self.events[0]
 435         return (isinstance(event, ScalarEvent) and event.anchor is None
 436                 and event.tag is None and event.implicit and event.value == u'')
 437
 438     def check_simple_key(self):
 439         length = 0
 440         if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
 441             if self.prepared_anchor is None:
 442                 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 443             length += len(self.prepared_anchor)
 444         if isinstance(self.event, (ScalarEvent, CollectionStartEvent))  \
 445                 and self.event.tag is not None:
 446             if self.prepared_tag is None:
 447                 self.prepared_tag = self.prepare_tag(self.event.tag)
 448             length += len(self.prepared_tag)
 449         if isinstance(self.event, ScalarEvent):
 450             if self.analysis is None:
 451                 self.analysis = self.analyze_scalar(self.event.value)
 452             length += len(self.analysis.scalar)
 453         return (length < 128 and (isinstance(self.event, AliasEvent)
 454             or (isinstance(self.event, ScalarEvent)
 455                     and not self.analysis.empty and not self.analysis.multiline)
 456             or self.check_empty_sequence() or self.check_empty_mapping()))
 457
 458     # Anchor, Tag, and Scalar processors.
 459
 460     def process_anchor(self, indicator):
 461         if self.event.anchor is None:
 462             self.prepared_anchor = None
 463             return
 464         if self.prepared_anchor is None:
 465             self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 466         if self.prepared_anchor:
 467             self.write_indicator(indicator+self.prepared_anchor, True)
 468         self.prepared_anchor = None
 469
 470     def process_tag(self):
 471         tag = self.event.tag
 472         if isinstance(self.event, ScalarEvent):
 473             if self.style is None:
 474                 self.style = self.choose_scalar_style()
 475             if ((not self.canonical or tag is None) and
 476                 ((self.style == '' and self.event.implicit[0])
 477                         or (self.style != '' and self.event.implicit[1]))):
 478                 self.prepared_tag = None
 479                 return
 480             if self.event.implicit[0] and tag is None:
 481                 tag = u'!'
 482                 self.prepared_tag = None
 483         else:
 484             if (not self.canonical or tag is None) and self.event.implicit:
 485                 self.prepared_tag = None
 486                 return
 487         if tag is None:
 488             raise EmitterError("tag is not specified")
 489         if self.prepared_tag is None:
 490             self.prepared_tag = self.prepare_tag(tag)
 491         if self.prepared_tag:
 492             self.write_indicator(self.prepared_tag, True)
 493         self.prepared_tag = None
 494
 495     def choose_scalar_style(self):
 496         if self.analysis is None:
 497             self.analysis = self.analyze_scalar(self.event.value)
 498         if self.event.style == '"' or self.canonical:
 499             return '"'
 500         if not self.event.style and self.event.implicit[0]:
 501             if (not (self.simple_key_context and
 502                     (self.analysis.empty or self.analysis.multiline))
 503                 and (self.flow_level and self.analysis.allow_flow_plain
 504                     or (not self.flow_level and self.analysis.allow_block_plain))):
 505                 return ''
 506         if self.event.style and self.event.style in '|>':
 507             if (not self.flow_level and not self.simple_key_context
 508                     and self.analysis.allow_block):
 509                 return self.event.style
 510         if not self.event.style or self.event.style == '\'':
 511             if (self.analysis.allow_single_quoted and
 512                     not (self.simple_key_context and self.analysis.multiline)):
 513                 return '\''
 514         return '"'
 515
 516     def process_scalar(self):
 517         if self.analysis is None:
 518             self.analysis = self.analyze_scalar(self.event.value)
 519         if self.style is None:
 520             self.style = self.choose_scalar_style()
 521         split = (not self.simple_key_context)
 522         #if self.analysis.multiline and split    \
 523         #        and (not self.style or self.style in '\'\"'):
 524         #    self.write_indent()
 525         if self.style == '"':
 526             self.write_double_quoted(self.analysis.scalar, split)
 527         elif self.style == '\'':
 528             self.write_single_quoted(self.analysis.scalar, split)
 529         elif self.style == '>':
 530             self.write_folded(self.analysis.scalar)
 531         elif self.style == '|':
 532             self.write_literal(self.analysis.scalar)
 533         else:
 534             self.write_plain(self.analysis.scalar, split)
 535         self.analysis = None
 536         self.style = None
 537
 538     # Analyzers.
 539
 540     def prepare_version(self, version):
 541         major, minor = version
 542         if major != 1:
 543             raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
 544         return u'%d.%d' % (major, minor)
 545
 546     def prepare_tag_handle(self, handle):
 547         if not handle:
 548             raise EmitterError("tag handle must not be empty")
 549         if handle[0] != u'!' or handle[-1] != u'!':
 550             raise EmitterError("tag handle must start and end with '!': %r"
 551                     % (handle.encode('utf-8')))
 552         for ch in handle[1:-1]:
 553             if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'  \
 554                     or ch in u'-_'):
 555                 raise EmitterError("invalid character %r in the tag handle: %r"
 556                         % (ch.encode('utf-8'), handle.encode('utf-8')))
 557         return handle
 558
 559     def prepare_tag_prefix(self, prefix):
 560         if not prefix:
 561             raise EmitterError("tag prefix must not be empty")
 562         chunks = []
 563         start = end = 0
 564         if prefix[0] == u'!':
 565             end = 1
 566         while end < len(prefix):
 567             ch = prefix[end]
 568             if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'   \
 569                     or ch in u'-;/?!:@&=+$,_.~*\'()[]':
 570                 end += 1
 571             else:
 572                 if start < end:
 573                     chunks.append(prefix[start:end])
 574                 start = end = end+1
 575                 data = ch.encode('utf-8')
 576                 for ch in data:
 577                     chunks.append(u'%%%02X' % ord(ch))
 578         if start < end:
 579             chunks.append(prefix[start:end])
 580         return u''.join(chunks)
 581
 582     def prepare_tag(self, tag):
 583         if not tag:
 584             raise EmitterError("tag must not be empty")
 585         if tag == u'!':
 586             return tag
 587         handle = None
 588         suffix = tag
 589         prefixes = self.tag_prefixes.keys()
 590         prefixes.sort()
 591         for prefix in prefixes:
 592             if tag.startswith(prefix)   \
 593                     and (prefix == u'!' or len(prefix) < len(tag)):
 594                 handle = self.tag_prefixes[prefix]
 595                 suffix = tag[len(prefix):]
 596         chunks = []
 597         start = end = 0
 598         while end < len(suffix):
 599             ch = suffix[end]
 600             if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'   \
 601                     or ch in u'-;/?:@&=+$,_.~*\'()[]'   \
 602                     or (ch == u'!' and handle != u'!'):
 603                 end += 1
 604             else:
 605                 if start < end:
 606                     chunks.append(suffix[start:end])
 607                 start = end = end+1
 608                 data = ch.encode('utf-8')
 609                 for ch in data:
 610                     chunks.append(u'%%%02X' % ord(ch))
 611         if start < end:
 612             chunks.append(suffix[start:end])
 613         suffix_text = u''.join(chunks)
 614         if handle:
 615             return u'%s%s' % (handle, suffix_text)
 616         else:
 617             return u'!<%s>' % suffix_text
 618
 619     def prepare_anchor(self, anchor):
 620         if not anchor:
 621             raise EmitterError("anchor must not be empty")
 622         for ch in anchor:
 623             if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'  \
 624                     or ch in u'-_'):
 625                 raise EmitterError("invalid character %r in the anchor: %r"
 626                         % (ch.encode('utf-8'), anchor.encode('utf-8')))
 627         return anchor
 628
 629     def analyze_scalar(self, scalar):
 630
 631         # Empty scalar is a special case.
 632         if not scalar:
 633             return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
 634                     allow_flow_plain=False, allow_block_plain=True,
 635                     allow_single_quoted=True, allow_double_quoted=True,
 636                     allow_block=False)
 637
 638         # Indicators and special characters.
 639         block_indicators = False
 640         flow_indicators = False
 641         line_breaks = False
 642         special_characters = False
 643
 644         # Important whitespace combinations.
 645         leading_space = False
 646         leading_break = False
 647         trailing_space = False
 648         trailing_break = False
 649         break_space = False
 650         space_break = False
 651
 652         # Check document indicators.
 653         if scalar.startswith(u'---') or scalar.startswith(u'...'):
 654             block_indicators = True
 655             flow_indicators = True
 656
 657         # First character or preceded by a whitespace.
 658         preceeded_by_whitespace = True
 659
 660         # Last character or followed by a whitespace.
 661         followed_by_whitespace = (len(scalar) == 1 or
 662                 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
 663
 664         # The previous character is a space.
 665         previous_space = False
 666
 667         # The previous character is a break.
 668         previous_break = False
 669
 670         index = 0
 671         while index < len(scalar):
 672             ch = scalar[index]
 673
 674             # Check for indicators.
 675             if index == 0:
 676                 # Leading indicators are special characters.
 677                 if ch in u'#,[]{}&*!|>\'\"%@`':
 678                     flow_indicators = True
 679                     block_indicators = True
 680                 if ch in u'?:':
 681                     flow_indicators = True
 682                     if followed_by_whitespace:
 683                         block_indicators = True
 684                 if ch == u'-' and followed_by_whitespace:
 685                     flow_indicators = True
 686                     block_indicators = True
 687             else:
 688                 # Some indicators cannot appear within a scalar as well.
 689                 if ch in u',?[]{}':
 690                     flow_indicators = True
 691                 if ch == u':':
 692                     flow_indicators = True
 693                     if followed_by_whitespace:
 694                         block_indicators = True
 695                 if ch == u'#' and preceeded_by_whitespace:
 696                     flow_indicators = True
 697                     block_indicators = True
 698
 699             # Check for line breaks, special, and unicode characters.
 700             if ch in u'\n\x85\u2028\u2029':
 701                 line_breaks = True
 702             if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
 703                 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
 704                         or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
 705                     unicode_characters = True
 706                     if not self.allow_unicode:
 707                         special_characters = True
 708                 else:
 709                     special_characters = True
 710
 711             # Detect important whitespace combinations.
 712             if ch == u' ':
 713                 if index == 0:
 714                     leading_space = True
 715                 if index == len(scalar)-1:
 716                     trailing_space = True
 717                 if previous_break:
 718                     break_space = True
 719                 previous_space = True
 720                 previous_break = False
 721             elif ch in u'\n\x85\u2028\u2029':
 722                 if index == 0:
 723                     leading_break = True
 724                 if index == len(scalar)-1:
 725                     trailing_break = True
 726                 if previous_space:
 727                     space_break = True
 728                 previous_space = False
 729                 previous_break = True
 730             else:
 731                 previous_space = False
 732                 previous_break = False
 733
 734             # Prepare for the next character.
 735             index += 1
 736             preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
 737             followed_by_whitespace = (index+1 >= len(scalar) or
 738                     scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
 739
 740         # Let's decide what styles are allowed.
 741         allow_flow_plain = True
 742         allow_block_plain = True
 743         allow_single_quoted = True
 744         allow_double_quoted = True
 745         allow_block = True
 746
 747         # Leading and trailing whitespaces are bad for plain scalars.
 748         if (leading_space or leading_break
 749                 or trailing_space or trailing_break):
 750             allow_flow_plain = allow_block_plain = False
 751
 752         # We do not permit trailing spaces for block scalars.
 753         if trailing_space:
 754             allow_block = False
 755
 756         # Spaces at the beginning of a new line are only acceptable for block
 757         # scalars.
 758         if break_space:
 759             allow_flow_plain = allow_block_plain = allow_single_quoted = False
 760
 761         # Spaces followed by breaks, as well as special character are only
 762         # allowed for double quoted scalars.
 763         if space_break or special_characters:
 764             allow_flow_plain = allow_block_plain =  \
 765             allow_single_quoted = allow_block = False
 766
 767         # Although the plain scalar writer supports breaks, we never emit
 768         # multiline plain scalars.
 769         if line_breaks:
 770             allow_flow_plain = allow_block_plain = False
 771
 772         # Flow indicators are forbidden for flow plain scalars.
 773         if flow_indicators:
 774             allow_flow_plain = False
 775
 776         # Block indicators are forbidden for block plain scalars.
 777         if block_indicators:
 778             allow_block_plain = False
 779
 780         return ScalarAnalysis(scalar=scalar,
 781                 empty=False, multiline=line_breaks,
 782                 allow_flow_plain=allow_flow_plain,
 783                 allow_block_plain=allow_block_plain,
 784                 allow_single_quoted=allow_single_quoted,
 785                 allow_double_quoted=allow_double_quoted,
 786                 allow_block=allow_block)
 787
 788     # Writers.
 789
 790     def flush_stream(self):
 791         if hasattr(self.stream, 'flush'):
 792             self.stream.flush()
 793
 794     def write_stream_start(self):
 795         # Write BOM if needed.
 796         if self.encoding and self.encoding.startswith('utf-16'):
 797             self.stream.write(u'\uFEFF'.encode(self.encoding))
 798
 799     def write_stream_end(self):
 800         self.flush_stream()
 801
 802     def write_indicator(self, indicator, need_whitespace,
 803             whitespace=False, indention=False):
 804         if self.whitespace or not need_whitespace:
 805             data = indicator
 806         else:
 807             data = u' '+indicator
 808         self.whitespace = whitespace
 809         self.indention = self.indention and indention
 810         self.column += len(data)
 811         self.open_ended = False
 812         if self.encoding:
 813             data = data.encode(self.encoding)
 814         self.stream.write(data)
 815
 816     def write_indent(self):
 817         indent = self.indent or 0
 818         if not self.indention or self.column > indent   \
 819                 or (self.column == indent and not self.whitespace):
 820             self.write_line_break()
 821         if self.column < indent:
 822             self.whitespace = True
 823             data = u' '*(indent-self.column)
 824             self.column = indent
 825             if self.encoding:
 826                 data = data.encode(self.encoding)
 827             self.stream.write(data)
 828
 829     def write_line_break(self, data=None):
 830         if data is None:
 831             data = self.best_line_break
 832         self.whitespace = True
 833         self.indention = True
 834         self.line += 1
 835         self.column = 0
 836         if self.encoding:
 837             data = data.encode(self.encoding)
 838         self.stream.write(data)
 839
 840     def write_version_directive(self, version_text):
 841         data = u'%%YAML %s' % version_text
 842         if self.encoding:
 843             data = data.encode(self.encoding)
 844         self.stream.write(data)
 845         self.write_line_break()
 846
 847     def write_tag_directive(self, handle_text, prefix_text):
 848         data = u'%%TAG %s %s' % (handle_text, prefix_text)
 849         if self.encoding:
 850             data = data.encode(self.encoding)
 851         self.stream.write(data)
 852         self.write_line_break()
 853
 854     # Scalar streams.
 855
 856     def write_single_quoted(self, text, split=True):
 857         self.write_indicator(u'\'', True)
 858         spaces = False
 859         breaks = False
 860         start = end = 0
 861         while end <= len(text):
 862             ch = None
 863             if end < len(text):
 864                 ch = text[end]
 865             if spaces:
 866                 if ch is None or ch != u' ':
 867                     if start+1 == end and self.column > self.best_width and split   \
 868                             and start != 0 and end != len(text):
 869                         self.write_indent()
 870                     else:
 871                         data = text[start:end]
 872                         self.column += len(data)
 873                         if self.encoding:
 874                             data = data.encode(self.encoding)
 875                         self.stream.write(data)
 876                     start = end
 877             elif breaks:
 878                 if ch is None or ch not in u'\n\x85\u2028\u2029':
 879                     if text[start] == u'\n':
 880                         self.write_line_break()
 881                     for br in text[start:end]:
 882                         if br == u'\n':
 883                             self.write_line_break()
 884                         else:
 885                             self.write_line_break(br)
 886                     self.write_indent()
 887                     start = end
 888             else:
 889                 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
 890                     if start < end:
 891                         data = text[start:end]
 892                         self.column += len(data)
 893                         if self.encoding:
 894                             data = data.encode(self.encoding)
 895                         self.stream.write(data)
 896                         start = end
 897             if ch == u'\'':
 898                 data = u'\'\''
 899                 self.column += 2
 900                 if self.encoding:
 901                     data = data.encode(self.encoding)
 902                 self.stream.write(data)
 903                 start = end + 1
 904             if ch is not None:
 905                 spaces = (ch == u' ')
 906                 breaks = (ch in u'\n\x85\u2028\u2029')
 907             end += 1
 908         self.write_indicator(u'\'', False)
 909
 910     ESCAPE_REPLACEMENTS = {
 911         u'\0':      u'0',
 912         u'\x07':    u'a',
 913         u'\x08':    u'b',
 914         u'\x09':    u't',
 915         u'\x0A':    u'n',
 916         u'\x0B':    u'v',
 917         u'\x0C':    u'f',
 918         u'\x0D':    u'r',
 919         u'\x1B':    u'e',
 920         u'\"':      u'\"',
 921         u'\\':      u'\\',
 922         u'\x85':    u'N',
 923         u'\xA0':    u'_',
 924         u'\u2028':  u'L',
 925         u'\u2029':  u'P',
 926     }
 927
 928     def write_double_quoted(self, text, split=True):
 929         self.write_indicator(u'"', True)
 930         start = end = 0
 931         while end <= len(text):
 932             ch = None
 933             if end < len(text):
 934                 ch = text[end]
 935             if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
 936                     or not (u'\x20' <= ch <= u'\x7E'
 937                         or (self.allow_unicode
 938                             and (u'\xA0' <= ch <= u'\uD7FF'
 939                                 or u'\uE000' <= ch <= u'\uFFFD'))):
 940                 if start < end:
 941                     data = text[start:end]
 942                     self.column += len(data)
 943                     if self.encoding:
 944                         data = data.encode(self.encoding)
 945                     self.stream.write(data)
 946                     start = end
 947                 if ch is not None:
 948                     if ch in self.ESCAPE_REPLACEMENTS:
 949                         data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
 950                     elif ch <= u'\xFF':
 951                         data = u'\\x%02X' % ord(ch)
 952                     elif ch <= u'\uFFFF':
 953                         data = u'\\u%04X' % ord(ch)
 954                     else:
 955                         data = u'\\U%08X' % ord(ch)
 956                     self.column += len(data)
 957                     if self.encoding:
 958                         data = data.encode(self.encoding)
 959                     self.stream.write(data)
 960                     start = end+1
 961             if 0 < end < len(text)-1 and (ch == u' ' or start >= end)   \
 962                     and self.column+(end-start) > self.best_width and split:
 963                 data = text[start:end]+u'\\'
 964                 if start < end:
 965                     start = end
 966                 self.column += len(data)
 967                 if self.encoding:
 968                     data = data.encode(self.encoding)
 969                 self.stream.write(data)
 970                 self.write_indent()
 971                 self.whitespace = False
 972                 self.indention = False
 973                 if text[start] == u' ':
 974                     data = u'\\'
 975                     self.column += len(data)
 976                     if self.encoding:
 977                         data = data.encode(self.encoding)
 978                     self.stream.write(data)
 979             end += 1
 980         self.write_indicator(u'"', False)
 981
 982     def determine_block_hints(self, text):
 983         hints = u''
 984         if text:
 985             if text[0] in u' \n\x85\u2028\u2029':
 986                 hints += unicode(self.best_indent)
 987             if text[-1] not in u'\n\x85\u2028\u2029':
 988                 hints += u'-'
 989             elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
 990                 hints += u'+'
 991         return hints
 992
 993     def write_folded(self, text):
 994         hints = self.determine_block_hints(text)
 995         self.write_indicator(u'>'+hints, True)
 996         if hints[-1:] == u'+':
 997             self.open_ended = True
 998         self.write_line_break()
 999         leading_space = True
1000         spaces = False
1001         breaks = True
1002         start = end = 0
1003         while end <= len(text):
1004             ch = None
1005             if end < len(text):
1006                 ch = text[end]
1007             if breaks:
1008                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1009                     if not leading_space and ch is not None and ch != u' '  \
1010                             and text[start] == u'\n':
1011                         self.write_line_break()
1012                     leading_space = (ch == u' ')
1013                     for br in text[start:end]:
1014                         if br == u'\n':
1015                             self.write_line_break()
1016                         else:
1017                             self.write_line_break(br)
1018                     if ch is not None:
1019                         self.write_indent()
1020                     start = end
1021             elif spaces:
1022                 if ch != u' ':
1023                     if start+1 == end and self.column > self.best_width:
1024                         self.write_indent()
1025                     else:
1026                         data = text[start:end]
1027                         self.column += len(data)
1028                         if self.encoding:
1029                             data = data.encode(self.encoding)
1030                         self.stream.write(data)
1031                     start = end
1032             else:
1033                 if ch is None or ch in u' \n\x85\u2028\u2029':
1034                     data = text[start:end]
1035                     self.column += len(data)
1036                     if self.encoding:
1037                         data = data.encode(self.encoding)
1038                     self.stream.write(data)
1039                     if ch is None:
1040                         self.write_line_break()
1041                     start = end
1042             if ch is not None:
1043                 breaks = (ch in u'\n\x85\u2028\u2029')
1044                 spaces = (ch == u' ')
1045             end += 1
1046
1047     def write_literal(self, text):
1048         hints = self.determine_block_hints(text)
1049         self.write_indicator(u'|'+hints, True)
1050         if hints[-1:] == u'+':
1051             self.open_ended = True
1052         self.write_line_break()
1053         breaks = True
1054         start = end = 0
1055         while end <= len(text):
1056             ch = None
1057             if end < len(text):
1058                 ch = text[end]
1059             if breaks:
1060                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1061                     for br in text[start:end]:
1062                         if br == u'\n':
1063                             self.write_line_break()
1064                         else:
1065                             self.write_line_break(br)
1066                     if ch is not None:
1067                         self.write_indent()
1068                     start = end
1069             else:
1070                 if ch is None or ch in u'\n\x85\u2028\u2029':
1071                     data = text[start:end]
1072                     if self.encoding:
1073                         data = data.encode(self.encoding)
1074                     self.stream.write(data)
1075                     if ch is None:
1076                         self.write_line_break()
1077                     start = end
1078             if ch is not None:
1079                 breaks = (ch in u'\n\x85\u2028\u2029')
1080             end += 1
1081
1082     def write_plain(self, text, split=True):
1083         if self.root_context:
1084             self.open_ended = True
1085         if not text:
1086             return
1087         if not self.whitespace:
1088             data = u' '
1089             self.column += len(data)
1090             if self.encoding:
1091                 data = data.encode(self.encoding)
1092             self.stream.write(data)
1093         self.whitespace = False
1094         self.indention = False
1095         spaces = False
1096         breaks = False
1097         start = end = 0
1098         while end <= len(text):
1099             ch = None
1100             if end < len(text):
1101                 ch = text[end]
1102             if spaces:
1103                 if ch != u' ':
1104                     if start+1 == end and self.column > self.best_width and split:
1105                         self.write_indent()
1106                         self.whitespace = False
1107                         self.indention = False
1108                     else:
1109                         data = text[start:end]
1110                         self.column += len(data)
1111                         if self.encoding:
1112                             data = data.encode(self.encoding)
1113                         self.stream.write(data)
1114                     start = end
1115             elif breaks:
1116                 if ch not in u'\n\x85\u2028\u2029':
1117                     if text[start] == u'\n':
1118                         self.write_line_break()
1119                     for br in text[start:end]:
1120                         if br == u'\n':
1121                             self.write_line_break()
1122                         else:
1123                             self.write_line_break(br)
1124                     self.write_indent()
1125                     self.whitespace = False
1126                     self.indention = False
1127                     start = end
1128             else:
1129                 if ch is None or ch in u' \n\x85\u2028\u2029':
1130                     data = text[start:end]
1131                     self.column += len(data)
1132                     if self.encoding:
1133                         data = data.encode(self.encoding)
1134                     self.stream.write(data)
1135                     start = end
1136             if ch is not None:
1137                 spaces = (ch == u' ')
1138                 breaks = (ch in u'\n\x85\u2028\u2029')
1139             end += 1
1140