diff options
author | Colin Walters <walters@verbum.org> | 2010-06-24 16:39:35 -0400 |
---|---|---|
committer | Colin Walters <walters@verbum.org> | 2010-06-25 09:19:14 -0400 |
commit | 0a674061eb4b7307f62c330bb3ebec5ad36ab1c6 (patch) | |
tree | 6ff9e298011052ef2ecb08e55b6440b99566400d /giscanner/annotationparser.py | |
parent | 11cfe386c37ced44a8e3efb5556bde3a43a11171 (diff) |
Clean up annotation parsing, don't try to parse invalid annotations
The old parser tried to parse:
@foo: some text here (other text)
Reject this. We strictly require another colon at the end to parse
the text in between as (option) (other option).
Futher ensure we only attempt to do option parse if we find something
that matches the strict regexp ([A-Za-z]+). This could be tightened
further.
TODO: Have a warning for something that looks like an annotation,
but isn't.
https://bugzilla.gnome.org/show_bug.cgi?id=622659
Diffstat (limited to 'giscanner/annotationparser.py')
-rw-r--r-- | giscanner/annotationparser.py | 149 |
1 files changed, 87 insertions, 62 deletions
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py index 01cbb1a..715ffe4 100644 --- a/giscanner/annotationparser.py +++ b/giscanner/annotationparser.py @@ -21,7 +21,6 @@ # AnnotationParser - parses gtk-doc annotations import re -import sys from .ast import (Array, Bitfield, Callback, Class, Enum, Field, Function, Interface, List, Map, Parameter, Property, Record, Return, @@ -49,7 +48,6 @@ TAG_VFUNC = 'virtual' TAG_SINCE = 'since' TAG_DEPRECATED = 'deprecated' TAG_RETURNS = 'returns' -TAG_RETURNS_ALT = 'return value' TAG_ATTRIBUTES = 'attributes' TAG_RENAME_TO = 'rename to' TAG_TYPE = 'type' @@ -100,14 +98,7 @@ class DocBlock(object): return '<DocBlock %r %r>' % (self.name, self.options) def get(self, name): - if name == TAG_RETURNS: - value = self.tags.get(name) - if value is None: - return self.tags.get(TAG_RETURNS_ALT) - else: - return value - else: - return self.tags.get(name) + return self.tags.get(name) class DocTag(object): @@ -152,6 +143,10 @@ class Option(object): class AnnotationParser(object): + WHITESPACE_RE = re.compile(r'^\s*$') + ASCII_TEXT_RE = re.compile(r'\s*[A-Za-z]+') + OPTION_RE = re.compile(r'\([A-Za-z]+[^(]*\)') + RETURNS_RE = re.compile(r'^return(s?)( value)?:', re.IGNORECASE) def __init__(self, namespace, source_scanner, transformer): self._blocks = {} @@ -195,64 +190,97 @@ class AnnotationParser(object): cpos = block_header.find(': ') if cpos: block_name = block_header[:cpos] - block_options, rest = self._parse_options(block_header[cpos+2:]) - if rest: - return + block_options = self.parse_options(block_header[cpos+2:]) else: block_name, block_options = block_header, {} block = DocBlock(block_name, block_options) + debug = block_name == 'annotation_object_compute_sum_n' comment_lines = [] - parse_parameters = True - canon_name = '' + parsing_parameters = True + last_param_tag = None + + # Second phase: parse parameters, return values, Tag: format + # annotations. + # + # Valid lines look like: + # * @foo: some comment here + # * @baz: (inout): This has an annotation + # * @bar: (out) (allow-none): this is a long parameter comment + # * that gets wrapped to the next line. + # * + # * Some documentation for the function. + # * + # * Returns: (transfer none): A value for line in comment[pos+1:].split('\n'): line = line.lstrip() - line = line[2:].strip() # Skip ' *' - if not line: - if parse_parameters: - parse_parameters = False + if (not line.startswith('*') or + self.WHITESPACE_RE.match(line[1:])): + # As soon as we find a line that's just whitespace, + # we're done parsing the parameters. + parsing_parameters = False continue - if line.startswith('@'): - line = line[1:] - elif not ': ' in line: - if parse_parameters and line: - if canon_name != '' and canon_name in block.tags: - block.tags[canon_name].comment += ' ' + line + + line = line[1:].lstrip() + + # Look for a parameter or return value. Both of these can + # have parenthesized options. + first_colonspace_index = line.find(': ') + is_parameter = line.startswith('@') + is_return_value = self.RETURNS_RE.search(line) + if ((is_parameter or is_return_value) + and first_colonspace_index > 0): + if is_parameter: + argname = line[1:first_colonspace_index] + else: + argname = TAG_RETURNS + tag = DocTag(argname) + second_colon_index = line.rfind(':') + found_options = False + if second_colon_index > first_colonspace_index: + value_line = \ + line[first_colonspace_index+2:second_colon_index] + if self.OPTION_RE.search(value_line): + # The OPTION_RE is a little bit heuristic. If + # we found two colons, we scan inside for something + # that looks like (foo). + # *Ideally* we'd change the gtk-doc format to + # require double colons, and then there'd be + # no ambiguity. I.e.: + # @foo:: Some documentation here + # But that'd be a rather incompatible change. + found_options = True + tag.comment = line[second_colon_index+1:].strip() + tag.options = self.parse_options(value_line) + if not found_options: + # We didn't find any options, so just take the whole thing + # as documentation. + tag.comment = line[first_colonspace_index+2:].strip() + block.tags[argname] = tag + last_param_tag = tag + elif (not is_parameter) and parsing_parameters and last_param_tag: + # We need to handle continuation lines on parameters. The + # conditional above - if a line doesn't start with '@', we're + # not yet in the documentation block for the whole function, + # and we've seen at least one parameter. + last_param_tag.comment += (' ' + line.strip()) + elif first_colonspace_index > 0: + # The line is of the form "Tag: some value here", like: + # Since: 0.8 + tag_name = line[:first_colonspace_index] + if self.ASCII_TEXT_RE.match(tag_name): + tag_name = tag_name.lower() + tag = DocTag(tag_name) + tag.value = line[first_colonspace_index+2:] + block.tags[tag_name] = tag else: comment_lines.append(line) - continue - tag_name, value = self._split_tag_namevalue(line) - canon_name = tag_name.lower() - if canon_name in block.tags: - print >> sys.stderr, ( - "Symbol %s has multiple definition of tag %r" % ( - block_name, canon_name, )) - block.tags[canon_name] = self._create_tag(canon_name, value) + elif (not is_parameter): + comment_lines.append(line) block.comment = '\n'.join(comment_lines) self._blocks[block.name] = block - def _split_tag_namevalue(self, raw): - """Split a line into tag name and value""" - parts = raw.split(': ', 1) - if len(parts) == 1: - tag_name = parts[0] - value = '' - if tag_name.endswith(':'): - tag_name = tag_name[:-1] - else: - tag_name, value = parts - return (tag_name, value) - - def _create_tag(self, tag_name, value): - # Tag: bar - # Tag: bar opt1 opt2 - tag = DocTag(tag_name) - tag.value = value - options, rest = self._parse_options(tag.value) - tag.options = options - tag.comment = rest or '' - return tag - - def _parse_options(self, value): + @classmethod + def parse_options(cls, value): # (foo) # (bar opt1 opt2...) opened = -1 @@ -277,11 +305,7 @@ class AnnotationParser(object): last = i + 2 opened = -1 - if last is not None: - rest = value[last:].strip() - else: - rest = None - return options, rest + return options class AnnotationApplier(object): @@ -871,7 +895,8 @@ class AnnotationApplier(object): annos_tag = self._get_tag(block, TAG_ATTRIBUTES) if annos_tag is None: return - for key, value in annos_tag.options.iteritems(): + options = AnnotationParser.parse_options(annos_tag.value) + for key, value in options.iteritems(): if value: node.attributes.append((key, value.one())) |