Edit

kc3-lang/pkgconf/doc/extract.py

Branch :

  • Show log

    Commit

  • Author : William Pitcock
    Date : 2016-12-10 18:09:30
    Hash : 9640592a
    Message : doc: extract: further cleanups

  • doc/extract.py
  • # derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
    # MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
    
    
    class Comment:
        def __init__(self, comment, line, multiline):
            self.comment = comment
            self.line = line
            self.multiline = multiline
    
        def __repr__(self):
            return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline)
    
        @property
        def clean_text(self):
            if not self.multiline:
                return self.comment.strip()
    
            lines = self.comment.splitlines()
            cleanlines = []
            for line in lines:
                if line[0:3] == ' * ':
                    cleanlines.append(line[3:])
                elif len(line) == 2:
                    cleanlines.append('')
            return '\n'.join(cleanlines)
    
        @property
        def doc_text(self):
            text = self.clean_text
            if '!doc' in text[0:4]:
                return text[5:]
            return None
    
    
    class FileError(Exception):
        pass
    
    
    class UnterminatedCommentError(Exception):
        pass
    
    
    def extract_comments(filename):
        """Extracts a list of comments from the given C family source file.
        Comments are represented with the Comment class found in the common module.
        C family comments come in two forms, single and multi-line comments.
            - Single-line comments begin with '//' and continue to the end of line.
            - Multi-line comments begin with '/*' and end with '*/' and can span
                multiple lines of code. If a multi-line comment does not terminate
                before EOF is reached, then an exception is raised.
        Note that this doesn't take language-specific preprocessor directives into
        consideration.
        Args:
            filename: String name of the file to extract comments from.
        Returns:
            Python list of Comment objects in the order that they appear in the file.
        Raises:
            FileError: File was unable to be open or read.
            UnterminatedCommentError: Encountered an unterminated multi-line
                comment.
        """
        try:
            with open(filename, 'r') as source_file:
                state = 0
                current_comment = ''
                comments = []
                line_counter = 1
                comment_start = 1
                while True:
                    char = source_file.read(1)
                    if not char:
                        if state is 3 or state is 4:
                            raise UnterminatedCommentError()
                        if state is 2:
                            # Was in single line comment. Create comment.
                            comment = Comment(current_comment, line_counter, False)
                            comments.append(comment)
                        return comments
                    if state is 0:
                        # Waiting for comment start character or beginning of
                        # string.
                        if char == '/':
                            state = 1
                        elif char == '"':
                            state = 5
                    elif state is 1:
                        # Found comment start character, classify next character and
                        # determine if single or multiline comment.
                        if char == '/':
                            state = 2
                        elif char == '*':
                            comment_start = line_counter
                            state = 3
                        else:
                            state = 0
                    elif state is 2:
                        # In single line comment, read characters until EOL.
                        if char == '\n':
                            comment = Comment(current_comment, line_counter, False)
                            comments.append(comment)
                            current_comment = ''
                            state = 0
                        else:
                            current_comment += char
                    elif state is 3:
                        # In multi-line comment, add characters until '*'
                        # encountered.
                        if char == '*':
                            state = 4
                        else:
                            current_comment += char
                    elif state is 4:
                        # In multi-line comment with asterisk found. Determine if
                        # comment is ending.
                        if char == '/':
                            comment = Comment(
                                current_comment, comment_start, True)
                            comments.append(comment)
                            current_comment = ''
                            state = 0
                        else:
                            current_comment += '*'
                            # Care for multiple '*' in a row
                            if char != '*':
                                current_comment += char
                                state = 3
                    elif state is 5:
                        # In string literal, expect literal end or escape char.
                        if char == '"':
                            state = 0
                        elif char == '\\':
                            state = 6
                    elif state is 6:
                        # In string literal, escaping current char.
                        state = 5
                    if char == '\n':
                        line_counter += 1
        except OSError as exception:
            raise FileError(str(exception))
    
    
    if __name__ == '__main__':
        import sys
        from pprint import pprint
    
        comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text]
        for comment in comments:
            print(comment.doc_text)