# Copyright (C) 2012 # See LICENSE.txt for details. """ General Python docstring wrapper ================================ Utility for wrapping docstrings in Python; specifically, docstrings in U{Epytext } or Sphinx ReStructureText format. The wrapping herein generally adheres to all the conventions set forth by the Twisted project U{http://twistedmatrix.com/}, but should be generally accurate for most Python projects. """ from __future__ import unicode_literals import argparse import sys import re from io import StringIO from uuid import uuid4 __all__ = [ "wrapPythonDocstring" ] if sys.version_info[0] <= 2: makeID = lambda: unicode(uuid4()) fromStdin = lambda s: s.decode("utf-8") toStdout = lambda s: s.encode("utf-8") PY2 = True else: makeID = lambda: str(uuid4()) fromStdin = lambda s: s toStdout = lambda s: s PY2 = False def isUnderline(expr): return bool(re.match("[=]+$", expr) or re.match("[-]+$", expr)) def startslist(x): return (x == '-' or (x.endswith(".") and (x[:-1].isdigit() or x[:-1] == '#'))) def isAcronym(word): """ Is the given word an acronym (separated by periods, so it doesn't end a sentence)? cf. lots of interesting acronyms, e.g. this is one. solve for x. a.b.c. is also one. You might also want to give an example parenthetically (e.g. this one). """ word = word.strip("(") return ((len(word) > 2 and word[1::2] == '.' * int(len(word) / 2)) or word in ["cf.", "viz."]) def isSentenceEnd(prevWord): """ Is the given word the end of a sentence? """ if not prevWord: return False # Exclamation points and question marks generally end sentences. if prevWord[-1] in "?!": return True # Now, if it's not a period, it's probably not the end of a sentence. if prevWord[-1] != ".": return False if isAcronym(prevWord): return False return True def beginsField(line): """ Does the given (stripped) line begin an epytext or ReST field? """ if line.startswith("@"): return True sphinxwords = """ param params return type rtype summary var ivar cvar raises raise except exception """.split() for word in sphinxwords: if line.startswith(":" + word): return True return False class RegularParagraph(object): otherIndent = "" def __init__(self, pointTracker, fixedIndent="", hangIndent="", followIndent="", originalIndent=0): self.words = [] self.fixedIndent = fixedIndent self.hangIndent = hangIndent self.followIndent = followIndent self.more = None self.prev = None self.pointTracker = pointTracker # originalIndent is the width of the indentation of the line this # paragraph originally came from in the input text. self.originalIndent = originalIndent self._unwrappedLines = 0 self._headingType = None self._headingPoints = [] def matchesTag(self, other): return False def __nonzero__(self): return bool(self.words) def all(self): while self is not None: #print self.__class__.__name__ if self: yield self self = self.more def setIsHeading(self, headingType): self._headingType = headingType def isHeading(self): return bool(self._headingType) def connect(self, more): self.more = more more.prev = self return more def islist(self): return self.words and startslist(self.words[0]) def previousListPeer(self): """ Find a previous paragraph that is also a list element, of the same indentation level if one exists. """ previous = self.prev matched = None while previous: if not previous.words: previous = previous.prev continue if not previous.islist(): break if previous.originalIndent <= self.originalIndent: return previous if previous.originalIndent > self.originalIndent: matched = previous previous = previous.prev if matched: return matched def add(self, line): clean = self.pointTracker.peek(line) stripped = clean.strip() thisLineIndent = len(clean) - len(clean.lstrip()) if stripped: self._unwrappedLines += 1 active = self firstword = list(self.pointTracker.filterWords(line.split()))[0] if beginsField(stripped): fp = FieldParagraph(pointTracker=self.pointTracker, originalIndent=thisLineIndent) fp.words.extend(line.split()) active = active.connect(fp) elif isUnderline(stripped) and self._unwrappedLines == 2: # This paragraph is actually a section heading. active.setIsHeading(stripped[0]) self._headingPoints = self.pointTracker.extractPoints(line) # FIXME: should respect leading indentation. active = active.connect(self.genRegular(originalIndent=thisLineIndent)) elif startslist(firstword): # Aesthetically I prefer a 2-space indent here, but the # convention in the codebase seems to be 4 spaces. LIST_INDENT = 4 # FIXME: this also needs to respect leading indentation so it # can properly represent nested lists. hangIndent = self.pointTracker.lengthOf(firstword) + 1 fi = self.fixedIndent if not (self.words and startslist(self.words[0])): fi += (" " * LIST_INDENT) fp = RegularParagraph( pointTracker=self.pointTracker, fixedIndent=fi, hangIndent=" " * hangIndent, followIndent=self.followIndent, originalIndent=thisLineIndent, ) fp.words.extend(line.split()) fp.prev = self peer = fp.previousListPeer() if peer: if peer.originalIndent >= fp.originalIndent: fp.fixedIndent = peer.fixedIndent else: fp.fixedIndent = peer.fixedIndent + (" " * LIST_INDENT) active = active.connect(fp) else: self.words.extend(line.split()) if stripped.endswith("::"): active = active.connect(PreFormattedParagraph( active, indentBegins=thisLineIndent )) return active else: rawstrip = line.strip() if rawstrip: self.words.append(rawstrip) if len(list(self.pointTracker.filterWords(self.words))): return self.connect(self.genRegular(originalIndent=thisLineIndent)) return self def wrap(self, output, indentation, width, initialBlank, singleSpace): maxWidthThisLine = width if not self.words: return if initialBlank: thisLine = self.firstIndent(indentation) else: thisLine = '' maxWidthThisLine -= (3 + len(indentation)) first = True prevWord = '' for num, word in enumerate(self.words): if not self.pointTracker.isWord(word): thisLine += word continue normalPrevWord = self.pointTracker.peek(prevWord) if num == 1 and startslist(normalPrevWord): spaces = 1 elif isSentenceEnd(normalPrevWord) and singleSpace: spaces = 2 else: spaces = 1 prevWord = word thisLineWidthWithThisWord = (self.pointTracker.lengthOf(thisLine) + self.pointTracker.lengthOf(word) + spaces) if thisLineWidthWithThisWord <= maxWidthThisLine or first: if first: first = not first else: thisLine += (" " * spaces) thisLine += word else: output.write(self.pointTracker.scan(thisLine, output.tell())) output.write("\n") maxWidthThisLine = width thisLine = self.restIndent(indentation) + word output.write(self.pointTracker.scan(thisLine, output.tell())) output.write("\n") if self.isHeading(): indentText = self.firstIndent(indentation) lineSize = self.pointTracker.lengthOf(thisLine) - len(indentText) output.write(self.pointTracker.scan( indentText + ''.join(self._headingPoints) + (self._headingType * lineSize), output.tell() )) output.write("\n") def firstIndent(self, indentation): return indentation + self.fixedIndent def restIndent(self, indentation): return (indentation + self.fixedIndent + self.hangIndent + self.otherIndent) def genRegular(self, originalIndent=0): return RegularParagraph(pointTracker=self.pointTracker, fixedIndent=self.nextIndent(), followIndent=self.nextIndent(), originalIndent=originalIndent) def nextIndent(self): return self.followIndent class FieldParagraph(RegularParagraph): @property def otherIndent(self): """ Compute the other indent appropriate to the length of a sphinx field, if we're wrapping a sphinx field. """ if self.words[0].startswith(':'): accumulatedLength = 0 for word in self.words: word = self.pointTracker.peek(word) # Add the length of the word accumulatedLength += len(word) # Add the following space accumulatedLength += 1 # If it gets too long then give up and go with the default. if accumulatedLength > 10: break if word.endswith(":"): return accumulatedLength * " " return " " def nextIndent(self): return " " def matchesTag(self, other): if isinstance(other, FieldParagraph): myWords = list(self.pointTracker.filterWords(self.words)) theirWords = list(self.pointTracker.filterWords(other.words)) if ( set([myWords[0], theirWords[0]]) == set(["@return:", "@rtype:"]) ): # matching @return and @rtype fields. return True elif myWords[0][0] == theirWords[0][0] == ':': # hack for sphinx: prevailing style seems to be 'group @params # together' if myWords[0] == theirWords[0]: return True elif ( set([myWords[0], theirWords[0]]) == set([":return:", ":rtype:"]) ): return True elif ( set([myWords[0], theirWords[0]]) == set([":param", ":type"]) and len(myWords) > 1 and len(theirWords) > 1 and myWords[1] == theirWords[1]): # same as "matching @param and @type" below, but stricter; # FIXME: these should be merged. return True else: return False elif len(myWords) > 1 and len(theirWords) > 1: # matching @param and @type fields. return myWords[1] == theirWords[1] return False else: return False class PreFormattedParagraph(object): def __init__(self, before, indentBegins): self.lines = [] self.before = before pointTracker = before.pointTracker fixedIndent = (before.fixedIndent + before.hangIndent + before.otherIndent) self.indentBegins = indentBegins self.fixedIndent = fixedIndent self.more = None self.prev = None self.pointTracker = pointTracker def islist(self): """ It's not a list. """ return False def connect(self, more): self.more = more more.prev = self return more @property def originalIndent(self): return self.indentBegins @property def words(self): """ Used by wrapper below to see if there are any words in a given paragraph and whether it should be skipped. """ return bool(self.lines) def matchesTag(self, other): return False def add(self, line): actualLine = self.pointTracker.peek(line) if actualLine.strip(): if len(actualLine) - len(actualLine.lstrip()) <= self.indentBegins: next = self.connect(self.before.genRegular()) return next.add(line) self.lines.append(line.rstrip()) else: self.lines.append(line.strip()) return self def fixIndentation(self): while self.lines and not self.lines[0].strip(): self.lines.pop(0) while self.lines and not self.lines[-1].strip(): self.lines.pop() if not self.lines: return cleanLines = list(map(self.pointTracker.peek, self.lines)) commonLeadingIndent = min([len(x) - len(x.lstrip()) for x in cleanLines if x.strip()] or [0]) newLines = [] for actualLine, line in zip(cleanLines, self.lines): if actualLine != line and line[:commonLeadingIndent].strip(): # There's a marker, and it's in the leading whitespace. # Explicitly reposition the marker at the beginning of the # fixed indentation. line = (self.pointTracker.marker + actualLine[commonLeadingIndent:]) else: line = line.rstrip()[commonLeadingIndent:] newLines.append(line) self.lines = newLines def wrap(self, output, indentation, width, initialBlank, singleSpace): # OK, now we know about all the lines we're going to know about. self.fixIndentation() for line in self.lines: if self.pointTracker.peek(line): output.write(indentation + " " + self.fixedIndent) output.write(self.pointTracker.scan(line, output.tell())) output.write("\n") class PointTracker(object): """ Object for keeping track of where the insertion points are. """ def __init__(self, point): self.point = point self.marker = "{" + makeID() + "}" self.outPoints = [] def annotate(self, text): """ Add point references to a block of text. """ return text[:self.point] + self.marker + text[self.point:] def filterWords(self, words): for word in words: if self.isWord(word): yield self.peek(word) def isWord(self, text): """ Is the given word actually a word, or just an artifact of the point-tracking process? If it's just the point marker by itself, then no, it isn't, and don't insert additional whitespace after it. """ return not (text == self.marker) def lengthOf(self, word): """ How long would this word be if it didn't have any point-markers in it? """ return len(self.peek(word)) def peek(self, word): """ What would this word look like if it didn't have any point-markers in it? """ return word.replace(self.marker, "") def extractPoints(self, text): """ Return a C{list} of all point markers contained in the text. """ if self.marker in text: return [self.marker] return [] def scan(self, text, offset): """ Scan some text for point markers, remember them, and remove them. """ idx = text.find(self.marker) if idx == -1: return text self.outPoints.append(idx + offset) return self.peek(text) def wrapPythonDocstring(docstring, output, indentation=" ", width=79, point=0, initialBlank=True, singleSpace=False): """ Wrap a given Python docstring. @param docstring: the docstring itself (just the stuff between the quotes). @type docstring: unicode @param output: The unicode output file to write the wrapped docstring to. @type output: L{file}-like (C{write} takes unicode.) @param indentation: a string (consisting only of spaces) indicating the amount of space to shift by. Don't adjust this. It's always 4 spaces. PEP8 says so. @type indentation: L{unicode} @param width: The maximum number of characters allowed in a wrapped line. @type width: L{int} @param point: The location of the cursor in the text, as an offset from the beginning of the docstring. If this function is being used from within a graphical editor, this parameter can be used (in addition to the return value of this function) to reposition the cursor at the relative position which the user will expect. @param singleSpace: If true, use a single space between sentences instead of two. @return: The new location of the cursor. """ # TODO: multiple points; usable, for example, for start and end of a # currently active selection. pt = PointTracker(point) start = paragraph = RegularParagraph(pt) docstring = pt.annotate(docstring) for line in docstring.split("\n"): paragraph = paragraph.add(line) prevp = None # output.write("{}".format(initialBlank)) for paragraph in start.all(): if initialBlank: if paragraph.words and not paragraph.matchesTag(prevp): output.write("\n") prevp = paragraph paragraph.wrap(output, indentation, width, initialBlank, singleSpace) initialBlank = True output.write(indentation) return pt.outPoints[0] if pt.outPoints else 0 def indentHeuristic(lines, io): """ Determine the indentation. """ for num, line in enumerate(lines): if num == 0: initialBlank = not bool(line) if not initialBlank: continue indentation = (len(line) - len(line.lstrip())) if indentation: return (initialBlank, indentation) # TODO: investigate the case where this happens. return True, 0 def sampleDocstring(): """This is a sample docstring where the last word is a little bit too long go go. This is another part of the docstring. """ def main(argv, indata): parser = argparse.ArgumentParser() parser.add_argument("--offset", type = int) parser.add_argument("--indent", type = int) parser.add_argument("--width", type = int, default = 79) parser.add_argument("--linewise", action='store_true') parser.add_argument("--single-space", action='store_false') namespace = parser.parse_args(argv[1:]) io = StringIO() inlines = indata.split("\n") if namespace.linewise: inlines.insert(0, "") initialBlank, indentCount = indentHeuristic(inlines, io) point = 0 width = namespace.width if namespace.offset is not None: point = namespace.offset if namespace.indent is not None: indentCount = namespace.indent offset = wrapPythonDocstring( indata, io, indentation=" " * indentCount, width=width, point=point, initialBlank=initialBlank, singleSpace=namespace.single_space ) prefix = StringIO() if namespace.offset is not None: prefix.write("{:d}".format(offset)) prefix.write(" ") output = prefix.getvalue() + io.getvalue() if namespace.linewise: output = "\n".join(output.split("\n")[1:-1]) return output if __name__ == '__main__': sys.stdout.write( toStdout( main( sys.argv, fromStdin(sys.stdin.read()), ) ) ) sys.stdout.flush()