Source code for moldesign.utils.docparsers.google

"""
    Routines for runtime docstring argument injection

    This file contains HEAVILY modified routines from sphinx.ext.napoleon, from version 1.4.4

    This has been vendored into MDT because the modification makes use of
    private functions which have already changed in the dev branch.

    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    :copyright: Copyright 2007-2016 by the Sphinx team, see sphinxlicense/AUTHORS.
    :license: BSD, see sphinxlicense/LICENSE for details.
"""

import collections
import re

import sys

_google_section_regex = re.compile(r'^(\s|\w)+:\s*$')
_google_typed_arg_regex = re.compile(r'\s*(.+?)\s*\(\s*(.+?)\s*\)')
_single_colon_regex = re.compile(r'(?<!:):(?!:)')
_xref_regex = re.compile(r'(:\w+:\S+:`.+?`|:\S+:`.+?`|`.+?`)')
_bullet_list_regex = re.compile(r'^(\*|\+|\-)(\s+\S|\s*$)')
_enumerated_list_regex = re.compile(
    r'^(?P<paren>\()?'
    r'(\d+|#|[ivxlcdm]+|[IVXLCDM]+|[a-zA-Z])'
    r'(?(paren)\)|\.)(\s+\S|\s*$)')


[docs]class GoogleDocArgumentInjector(object):

    SECTIONS = set('args arguments parameters'.split())

    def __init__(self, docstring, prepare=True):
        # this routine has been modified - it's been streamlined for the current purpose

        if prepare:
            if docstring is None:
                self.docstring = []
            else:
                self.docstring = prepare_docstring(docstring)

        elif isinstance(docstring, basestring):
            self.docstring = docstring.splitlines()

        else:
            self.docstring = docstring

        self.lines_before_args = []
        self.arg_section = []
        self.lines_after_args = []
        self.args = collections.OrderedDict()
        self.arg_indent = None
        self.arg_section_name = 'Args'  # default, can be overwritten by the actual section name


        self._what = 'function'
        self._lines = list(self.docstring)
        self._line_iter = modify_iter(self.docstring, modifier=lambda s: s.rstrip())
        self._parsed_lines = []
        self._is_in_section = False
        self._section_indent = 0

        self._sections = {
                'args': self._parse_parameters_section,
                'arguments': self._parse_parameters_section,
                'attributes': None,
                'example': None,
                'examples': None,
                'keyword args': None,
                'keyword arguments': None,
                'methods': None,
                'note': None,
                'notes': None,
                'other parameters': None,
                'parameters': self._parse_parameters_section,
                'return': None,
                'returns': None,
                'raises': None,
                'references': None,
                'see also': None,
                'todo': None,
                'warning': None,
                'warnings': None,
                'warns': None,
                'yield': None,
                'yields': None,
            }

        self.parse()

[docs]    def new_docstring(self):
        """ Create a new docstring with the current state of the argument list

        Returns:
            str: docstring with modified argument list
        """
        newlines = list(self.lines_before_args)
        if self.args:
            newlines.append(' '*self.arg_indent + self.arg_section_name + ':')
            newlines.extend(self._indent(self.args.values(), self.arg_indent+4))
            newlines.append('')
        newlines.extend(self.lines_after_args)

        return '\n'.join(newlines)

[docs]    def parse(self):
        """ This method is a modified version of GoogleDocstring._parse
        """
        self._parsed_lines = self._consume_empty()

        found_args = lines_are_args = False

        while self._line_iter.has_next():
            if self._is_section_header():
                try:
                    section = self._consume_section_header()
                    self._is_in_section = True
                    self._section_indent = self._get_current_indent()

                    lines = [section + ':']

                    if section.lower() in self.SECTIONS:
                        lines.extend(self._sections[section.lower()](section))
                        found_args = True
                        lines_are_args = True
                    else:
                        lines.extend(self._consume_to_next_section())

                finally:
                    self._is_in_section = False
                    self._section_indent = 0
            else:
                if not self._parsed_lines:
                    lines = self._consume_contiguous()+self._consume_empty()
                else:
                    lines = self._consume_to_next_section()

            if lines_are_args:
                lines_are_args = False
                self.arg_section.extend(lines)
            elif found_args:
                self.lines_after_args.extend(lines)
            else:
                self.lines_before_args.extend(lines)

            self._parsed_lines.extend(lines)
            if self.arg_indent is None:
                self.arg_indent = self._get_current_indent()


    def _parse_parameters_section(self, section):
        """ This method was heavily modified to store information instead of formatting it for rst
        """
        self.arg_section_name = section
        fields = self._consume_fields()
        num_indent = self._get_current_indent()
        self.arg_indent = num_indent
        lines = []

        for _name, _type, _desc in fields:
            _desc = self._strip_empty(_desc)
            if isinstance(_desc, list):
                _desc = '\n    '.join(_desc)

            if _type:
                line = '%s (%s): %s' % (_name, _type, _desc)
            else:
                line = '%s: %s' % (_name, _desc)
            self.args[_name.lstrip('\*')] = line

            lines.append(line)

        lines = self._indent(lines, num_indent+4)
        if lines[-1].strip():
            lines.append('')
        return lines

    def _indent(self, lines, n=4):
        # MDT: modified to include breaks within lines
        sp = ' ' * n
        return [sp + line.replace('\n', '\n'+sp) for line in lines]


    ######################################################
    ### All routines below are unmodified              ###
    ######################################################
[docs]    def lines(self):
        """Return the parsed lines of the docstring in reStructuredText format.

        Returns
        -------
        :obj:`list` of :obj:`str`
            The lines of the docstring in a list.

        """
        return self._parsed_lines

    def _consume_indented_block(self, indent=1):
        lines = []
        line = self._line_iter.peek()
        while(not self._is_section_break() and
              (not line or self._is_indented(line, indent))):
            lines.append(next(self._line_iter))
            line = self._line_iter.peek()
        return lines

    def _consume_contiguous(self):
        lines = []
        while (self._line_iter.has_next() and
               self._line_iter.peek() and
               not self._is_section_header()):
            lines.append(next(self._line_iter))
        return lines

    def _consume_empty(self):
        lines = []
        line = self._line_iter.peek()
        while self._line_iter.has_next() and not line:
            lines.append(next(self._line_iter))
            line = self._line_iter.peek()
        return lines

    def _consume_field(self, parse_type=True, prefer_type=False):
        line = next(self._line_iter)

        before, colon, after = self._partition_field_on_colon(line)
        _name, _type, _desc = before, '', after

        if parse_type:
            match = _google_typed_arg_regex.match(before)
            if match:
                _name = match.group(1)
                _type = match.group(2)

        _name = self._escape_args_and_kwargs(_name)

        if prefer_type and not _type:
            _type, _name = _name, _type
        indent = self._get_indent(line) + 1
        _desc = [_desc] + self._dedent(self._consume_indented_block(indent))
        _desc = self.__class__(_desc, prepare=False).lines()
        return _name, _type, _desc

    def _consume_fields(self, parse_type=True, prefer_type=False):
        self._consume_empty()
        fields = []
        while not self._is_section_break():
            _name, _type, _desc = self._consume_field(parse_type, prefer_type)
            if _name or _type or _desc:
                fields.append((_name, _type, _desc,))
        return fields

    def _consume_section_header(self):
        section = next(self._line_iter)
        stripped_section = section.strip(':')
        if stripped_section.lower() in self._sections:
            section = stripped_section
        return section

    def _consume_to_end(self):
        lines = []
        while self._line_iter.has_next():
            lines.append(next(self._line_iter))
        return lines

    def _consume_to_next_section(self):
        self._consume_empty()
        lines = []
        while not self._is_section_break():
            lines.append(next(self._line_iter))
        return lines + self._consume_empty()

    def _dedent(self, lines, full=False):
        if full:
            return [line.lstrip() for line in lines]
        else:
            min_indent = self._get_min_indent(lines)
            return [line[min_indent:] for line in lines]

    def _escape_args_and_kwargs(self, name):
        if name[:2] == '**':
            return r'\*\*' + name[2:]
        elif name[:1] == '*':
            return r'\*' + name[1:]
        else:
            return name

    def _fix_field_desc(self, desc):
        if self._is_list(desc):
            desc = [''] + desc
        elif desc[0].endswith('::'):
            desc_block = desc[1:]
            indent = self._get_indent(desc[0])
            block_indent = self._get_initial_indent(desc_block)
            if block_indent > indent:
                desc = [''] + desc
            else:
                desc = ['', desc[0]] + self._indent(desc_block, 4)
        return desc

    def _get_current_indent(self, peek_ahead=0):
        line = self._line_iter.peek(peek_ahead + 1)[peek_ahead]
        while line != self._line_iter.sentinel:
            if line:
                return self._get_indent(line)
            peek_ahead += 1
            line = self._line_iter.peek(peek_ahead + 1)[peek_ahead]
        return 0

    def _get_indent(self, line):
        for i, s in enumerate(line):
            if not s.isspace():
                return i
        return len(line)

    def _get_initial_indent(self, lines):
        for line in lines:
            if line:
                return self._get_indent(line)
        return 0

    def _get_min_indent(self, lines):
        min_indent = None
        for line in lines:
            if line:
                indent = self._get_indent(line)
                if min_indent is None:
                    min_indent = indent
                elif indent < min_indent:
                    min_indent = indent
        return min_indent or 0

    def _is_indented(self, line, indent=1):
        for i, s in enumerate(line):
            if i >= indent:
                return True
            elif not s.isspace():
                return False
        return False

    def _is_list(self, lines):
        if not lines:
            return False
        if _bullet_list_regex.match(lines[0]):
            return True
        if _enumerated_list_regex.match(lines[0]):
            return True
        if len(lines) < 2 or lines[0].endswith('::'):
            return False
        indent = self._get_indent(lines[0])
        next_indent = indent
        for line in lines[1:]:
            if line:
                next_indent = self._get_indent(line)
                break
        return next_indent > indent

    def _is_section_header(self):
        section = self._line_iter.peek().lower()
        match = _google_section_regex.match(section)
        if match and section.strip(':') in self._sections:
            header_indent = self._get_indent(section)
            section_indent = self._get_current_indent(peek_ahead=1)
            return section_indent > header_indent

        return False

    def _is_section_break(self):
        line = self._line_iter.peek()
        return (not self._line_iter.has_next() or
                self._is_section_header() or
                (self._is_in_section and
                    line and
                    not self._is_indented(line, self._section_indent)))

    def _partition_field_on_colon(self, line):
        before_colon = []
        after_colon = []
        colon = ''
        found_colon = False
        for i, source in enumerate(_xref_regex.split(line)):
            if found_colon:
                after_colon.append(source)
            else:
                m = _single_colon_regex.search(source)
                if (i % 2) == 0 and m:
                    found_colon = True
                    colon = source[m.start(): m.end()]
                    before_colon.append(source[:m.start()])
                    after_colon.append(source[m.end():])
                else:
                    before_colon.append(source)

        return ("".join(before_colon).strip(),
                colon,
                "".join(after_colon).strip())

    def _strip_empty(self, lines):
        if lines:
            start = -1
            for i, line in enumerate(lines):
                if line:
                    start = i
                    break
            if start == -1:
                lines = []
            end = -1
            for i in reversed(range(len(lines))):
                line = lines[i]
                if line:
                    end = i
                    break
            if start > 0 or end + 1 < len(lines):
                lines = lines[start:end + 1]
        return lines



[docs]class peek_iter(object):
    """An iterator object that supports peeking ahead.
    Parameters
    ----------
    o : iterable or callable
        `o` is interpreted very differently depending on the presence of
        `sentinel`.
        If `sentinel` is not given, then `o` must be a collection object
        which supports either the iteration protocol or the sequence protocol.
        If `sentinel` is given, then `o` must be a callable object.
    sentinel : any value, optional
        If given, the iterator will call `o` with no arguments for each
        call to its `next` method; if the value returned is equal to
        `sentinel`, :exc:`StopIteration` will be raised, otherwise the
        value will be returned.
    See Also
    --------
    `peek_iter` can operate as a drop in replacement for the built-in
    `iter <https://docs.python.org/2/library/functions.html#iter>`_ function.
    Attributes
    ----------
    sentinel
        The value used to indicate the iterator is exhausted. If `sentinel`
        was not given when the `peek_iter` was instantiated, then it will
        be set to a new object instance: ``object()``.
    """
    def __init__(self, *args):
        """__init__(o, sentinel=None)"""
        self._iterable = iter(*args)
        self._cache = collections.deque()
        if len(args) == 2:
            self.sentinel = args[1]
        else:
            self.sentinel = object()

    def __iter__(self):
        return self

    def __next__(self, n=None):
        # note: prevent 2to3 to transform self.next() in next(self) which
        # causes an infinite loop !
        return getattr(self, 'next')(n)

    def _fillcache(self, n):
        """Cache `n` items. If `n` is 0 or None, then 1 item is cached."""
        if not n:
            n = 1
        try:
            while len(self._cache) < n:
                self._cache.append(next(self._iterable))
        except StopIteration:
            while len(self._cache) < n:
                self._cache.append(self.sentinel)

[docs]    def has_next(self):
        """Determine if iterator is exhausted.
        Returns
        -------
        bool
            True if iterator has more items, False otherwise.
        Note
        ----
        Will never raise :exc:`StopIteration`.
        """
        return self.peek() != self.sentinel

[docs]    def next(self, n=None):
        """Get the next item or `n` items of the iterator.
        Parameters
        ----------
        n : int or None
            The number of items to retrieve. Defaults to None.
        Returns
        -------
        item or list of items
            The next item or `n` items of the iterator. If `n` is None, the
            item itself is returned. If `n` is an int, the items will be
            returned in a list. If `n` is 0, an empty list is returned.
        Raises
        ------
        StopIteration
            Raised if the iterator is exhausted, even if `n` is 0.
        """
        self._fillcache(n)
        if not n:
            if self._cache[0] == self.sentinel:
                raise StopIteration
            if n is None:
                result = self._cache.popleft()
            else:
                result = []
        else:
            if self._cache[n - 1] == self.sentinel:
                raise StopIteration
            result = [self._cache.popleft() for i in range(n)]
        return result

[docs]    def peek(self, n=None):
        """Preview the next item or `n` items of the iterator.
        The iterator is not advanced when peek is called.
        Returns
        -------
        item or list of items
            The next item or `n` items of the iterator. If `n` is None, the
            item itself is returned. If `n` is an int, the items will be
            returned in a list. If `n` is 0, an empty list is returned.
            If the iterator is exhausted, `peek_iter.sentinel` is returned,
            or placed as the last item in the returned list.
        Note
        ----
        Will never raise :exc:`StopIteration`.
        """
        self._fillcache(n)
        if n is None:
            result = self._cache[0]
        else:
            result = [self._cache[i] for i in range(n)]
        return result


[docs]class modify_iter(peek_iter):
    """An iterator object that supports modifying items as they are returned.
    Parameters
    ----------
    o : iterable or callable
        `o` is interpreted very differently depending on the presence of
        `sentinel`.
        If `sentinel` is not given, then `o` must be a collection object
        which supports either the iteration protocol or the sequence protocol.
        If `sentinel` is given, then `o` must be a callable object.
    sentinel : any value, optional
        If given, the iterator will call `o` with no arguments for each
        call to its `next` method; if the value returned is equal to
        `sentinel`, :exc:`StopIteration` will be raised, otherwise the
        value will be returned.
    modifier : callable, optional
        The function that will be used to modify each item returned by the
        iterator. `modifier` should take a single argument and return a
        single value. Defaults to ``lambda x: x``.
        If `sentinel` is not given, `modifier` must be passed as a keyword
        argument.
    Attributes
    ----------
    modifier : callable
        `modifier` is called with each item in `o` as it is iterated. The
        return value of `modifier` is returned in lieu of the item.
        Values returned by `peek` as well as `next` are affected by
        `modifier`. However, `modify_iter.sentinel` is never passed through
        `modifier`; it will always be returned from `peek` unmodified.
    Example
    -------
    >>> a = ["     A list    ",
    ...      "   of strings  ",
    ...      "      with     ",
    ...      "      extra    ",
    ...      "   whitespace. "]
    >>> modifier = lambda s: s.strip().replace('with', 'without')
    >>> for s in modify_iter(a, modifier=modifier):
    ...   print('"%s"' % s)
    "A list"
    "of strings"
    "without"
    "extra"
    "whitespace."
    """
    def __init__(self, *args, **kwargs):
        """__init__(o, sentinel=None, modifier=lambda x: x)"""
        if 'modifier' in kwargs:
            self.modifier = kwargs['modifier']
        elif len(args) > 2:
            self.modifier = args[2]
            args = args[:2]
        else:
            self.modifier = lambda x: x
        if not callable(self.modifier):
            raise TypeError('modify_iter(o, modifier): '
                            'modifier must be callable')
        super(modify_iter, self).__init__(*args)

    def _fillcache(self, n):
        """Cache `n` modified items. If `n` is 0 or None, 1 item is cached.
        Each item returned by the iterator is passed through the
        `modify_iter.modified` function before being cached.
        """
        if not n:
            n = 1
        try:
            while len(self._cache) < n:
                self._cache.append(self.modifier(next(self._iterable)))
        except StopIteration:
            while len(self._cache) < n:
                self._cache.append(self.sentinel)


[docs]def prepare_docstring(s, ignore=1):
    """Convert a docstring into lines of parseable reST.  Remove common leading
    indentation, where the indentation of a given number of lines (usually just
    one) is ignored.
    Return the docstring as a list of lines usable for inserting into a docutils
    ViewList (used as argument of nested_parse().)  An empty line is added to
    act as a separator between this docstring and following content.
    """
    lines = s.expandtabs().splitlines()
    # Find minimum indentation of any non-blank lines after ignored lines.
    margin = sys.maxsize
    for line in lines[ignore:]:
        content = len(line.lstrip())
        if content:
            indent = len(line) - content
            margin = min(margin, indent)
    # Remove indentation from ignored lines.
    for i in range(ignore):
        if i < len(lines):
            lines[i] = lines[i].lstrip()
    if margin < sys.maxsize:
        for i in range(ignore, len(lines)):
            lines[i] = lines[i][margin:]
    # Remove any leading blank lines.
    while lines and not lines[0]:
        lines.pop(0)
    # make sure there is an empty line at the end
    if lines and lines[-1]:
        lines.append('')
    return lines