664 lines
20 KiB
Python
664 lines
20 KiB
Python
# Copyright (C) 2012
|
|
# See LICENSE.txt for details.
|
|
|
|
"""
|
|
General Python docstring wrapper
|
|
================================
|
|
|
|
Utility for wrapping docstrings in Python; specifically, docstrings in
|
|
U{Epytext <http://epydoc.sourceforge.net/manual-epytext.html>} or Sphinx
|
|
ReStructureText format.
|
|
|
|
The wrapping herein generally adheres to all the conventions set forth by the
|
|
Twisted project U{http://twistedmatrix.com/}, but should be generally accurate
|
|
for most Python projects.
|
|
"""
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
import argparse
|
|
import sys
|
|
import re
|
|
|
|
from io import StringIO
|
|
from uuid import uuid4
|
|
|
|
|
|
__all__ = [
|
|
"wrapPythonDocstring"
|
|
]
|
|
|
|
|
|
if sys.version_info[0] <= 2:
|
|
makeID = lambda: unicode(uuid4())
|
|
fromStdin = lambda s: s.decode("utf-8")
|
|
toStdout = lambda s: s.encode("utf-8")
|
|
PY2 = True
|
|
else:
|
|
makeID = lambda: str(uuid4())
|
|
fromStdin = lambda s: s
|
|
toStdout = lambda s: s
|
|
PY2 = False
|
|
|
|
|
|
|
|
def isUnderline(expr):
|
|
return bool(re.match("[=]+$", expr) or re.match("[-]+$", expr))
|
|
|
|
|
|
|
|
def startslist(x):
|
|
return (x == '-' or (x.endswith(".") and (x[:-1].isdigit()
|
|
or x[:-1] == '#')))
|
|
|
|
|
|
|
|
def isAcronym(word):
|
|
"""
|
|
Is the given word an acronym (separated by periods, so it doesn't end a
|
|
sentence)? cf. lots of interesting acronyms, e.g. this is one. solve for
|
|
x. a.b.c. is also one. You might also want to give an example
|
|
parenthetically (e.g. this one).
|
|
"""
|
|
word = word.strip("(")
|
|
return ((len(word) > 2 and word[1::2] == '.' * int(len(word) / 2)) or
|
|
word in ["cf.", "viz."])
|
|
|
|
|
|
|
|
def isSentenceEnd(prevWord):
|
|
"""
|
|
Is the given word the end of a sentence?
|
|
"""
|
|
if not prevWord:
|
|
return False
|
|
# Exclamation points and question marks generally end sentences.
|
|
if prevWord[-1] in "?!":
|
|
return True
|
|
# Now, if it's not a period, it's probably not the end of a sentence.
|
|
if prevWord[-1] != ".":
|
|
return False
|
|
if isAcronym(prevWord):
|
|
return False
|
|
return True
|
|
|
|
|
|
|
|
def beginsField(line):
|
|
"""
|
|
Does the given (stripped) line begin an epytext or ReST field?
|
|
"""
|
|
if line.startswith("@"):
|
|
return True
|
|
sphinxwords = """
|
|
param params return type rtype summary var ivar cvar raises raise except
|
|
exception
|
|
""".split()
|
|
for word in sphinxwords:
|
|
if line.startswith(":" + word):
|
|
return True
|
|
return False
|
|
|
|
|
|
|
|
class RegularParagraph(object):
|
|
otherIndent = ""
|
|
|
|
def __init__(self, pointTracker, fixedIndent="", hangIndent="",
|
|
followIndent="", originalIndent=0):
|
|
self.words = []
|
|
self.fixedIndent = fixedIndent
|
|
self.hangIndent = hangIndent
|
|
self.followIndent = followIndent
|
|
self.more = None
|
|
self.prev = None
|
|
self.pointTracker = pointTracker
|
|
# originalIndent is the width of the indentation of the line this
|
|
# paragraph originally came from in the input text.
|
|
self.originalIndent = originalIndent
|
|
self._unwrappedLines = 0
|
|
self._headingType = None
|
|
self._headingPoints = []
|
|
|
|
|
|
def matchesTag(self, other):
|
|
return False
|
|
|
|
|
|
def __nonzero__(self):
|
|
return bool(self.words)
|
|
|
|
|
|
def all(self):
|
|
while self is not None:
|
|
#print self.__class__.__name__
|
|
if self:
|
|
yield self
|
|
self = self.more
|
|
|
|
|
|
def setIsHeading(self, headingType):
|
|
self._headingType = headingType
|
|
|
|
|
|
def isHeading(self):
|
|
return bool(self._headingType)
|
|
|
|
def connect(self, more):
|
|
self.more = more
|
|
more.prev = self
|
|
return more
|
|
|
|
def islist(self):
|
|
return self.words and startslist(self.words[0])
|
|
|
|
def previousListPeer(self):
|
|
"""
|
|
Find a previous paragraph that is also a list element, of the same
|
|
indentation level if one exists.
|
|
"""
|
|
previous = self.prev
|
|
matched = None
|
|
while previous:
|
|
if not previous.words:
|
|
previous = previous.prev
|
|
continue
|
|
if not previous.islist():
|
|
break
|
|
if previous.originalIndent <= self.originalIndent:
|
|
return previous
|
|
if previous.originalIndent > self.originalIndent:
|
|
matched = previous
|
|
previous = previous.prev
|
|
if matched:
|
|
return matched
|
|
|
|
def add(self, line):
|
|
clean = self.pointTracker.peek(line)
|
|
stripped = clean.strip()
|
|
thisLineIndent = len(clean) - len(clean.lstrip())
|
|
|
|
if stripped:
|
|
self._unwrappedLines += 1
|
|
active = self
|
|
firstword = list(self.pointTracker.filterWords(line.split()))[0]
|
|
if beginsField(stripped):
|
|
fp = FieldParagraph(pointTracker=self.pointTracker, originalIndent=thisLineIndent)
|
|
fp.words.extend(line.split())
|
|
active = active.connect(fp)
|
|
elif isUnderline(stripped) and self._unwrappedLines == 2:
|
|
# This paragraph is actually a section heading.
|
|
active.setIsHeading(stripped[0])
|
|
self._headingPoints = self.pointTracker.extractPoints(line)
|
|
# FIXME: should respect leading indentation.
|
|
active = active.connect(self.genRegular(originalIndent=thisLineIndent))
|
|
elif startslist(firstword):
|
|
# Aesthetically I prefer a 2-space indent here, but the
|
|
# convention in the codebase seems to be 4 spaces.
|
|
LIST_INDENT = 4
|
|
# FIXME: this also needs to respect leading indentation so it
|
|
# can properly represent nested lists.
|
|
hangIndent = self.pointTracker.lengthOf(firstword) + 1
|
|
fi = self.fixedIndent
|
|
if not (self.words and startslist(self.words[0])):
|
|
fi += (" " * LIST_INDENT)
|
|
fp = RegularParagraph(
|
|
pointTracker=self.pointTracker,
|
|
fixedIndent=fi,
|
|
hangIndent=" " * hangIndent,
|
|
followIndent=self.followIndent,
|
|
originalIndent=thisLineIndent,
|
|
)
|
|
fp.words.extend(line.split())
|
|
fp.prev = self
|
|
peer = fp.previousListPeer()
|
|
if peer:
|
|
if peer.originalIndent >= fp.originalIndent:
|
|
fp.fixedIndent = peer.fixedIndent
|
|
else:
|
|
fp.fixedIndent = peer.fixedIndent + (" " * LIST_INDENT)
|
|
active = active.connect(fp)
|
|
else:
|
|
self.words.extend(line.split())
|
|
if stripped.endswith("::"):
|
|
active = active.connect(PreFormattedParagraph(
|
|
active,
|
|
indentBegins=thisLineIndent
|
|
))
|
|
return active
|
|
else:
|
|
rawstrip = line.strip()
|
|
if rawstrip:
|
|
self.words.append(rawstrip)
|
|
if len(list(self.pointTracker.filterWords(self.words))):
|
|
return self.connect(self.genRegular(originalIndent=thisLineIndent))
|
|
return self
|
|
|
|
|
|
def wrap(self, output, indentation, width, initialBlank, singleSpace):
|
|
maxWidthThisLine = width
|
|
if not self.words:
|
|
return
|
|
if initialBlank:
|
|
thisLine = self.firstIndent(indentation)
|
|
else:
|
|
thisLine = ''
|
|
maxWidthThisLine -= (3 + len(indentation))
|
|
first = True
|
|
prevWord = ''
|
|
for num, word in enumerate(self.words):
|
|
if not self.pointTracker.isWord(word):
|
|
thisLine += word
|
|
continue
|
|
normalPrevWord = self.pointTracker.peek(prevWord)
|
|
if num == 1 and startslist(normalPrevWord):
|
|
spaces = 1
|
|
elif isSentenceEnd(normalPrevWord) and singleSpace:
|
|
spaces = 2
|
|
else:
|
|
spaces = 1
|
|
prevWord = word
|
|
thisLineWidthWithThisWord = (self.pointTracker.lengthOf(thisLine) +
|
|
self.pointTracker.lengthOf(word) +
|
|
spaces)
|
|
if thisLineWidthWithThisWord <= maxWidthThisLine or first:
|
|
if first:
|
|
first = not first
|
|
else:
|
|
thisLine += (" " * spaces)
|
|
thisLine += word
|
|
else:
|
|
output.write(self.pointTracker.scan(thisLine, output.tell()))
|
|
output.write("\n")
|
|
maxWidthThisLine = width
|
|
thisLine = self.restIndent(indentation) + word
|
|
output.write(self.pointTracker.scan(thisLine, output.tell()))
|
|
output.write("\n")
|
|
if self.isHeading():
|
|
indentText = self.firstIndent(indentation)
|
|
lineSize = self.pointTracker.lengthOf(thisLine) - len(indentText)
|
|
output.write(self.pointTracker.scan(
|
|
indentText + ''.join(self._headingPoints) +
|
|
(self._headingType * lineSize), output.tell()
|
|
))
|
|
output.write("\n")
|
|
|
|
|
|
def firstIndent(self, indentation):
|
|
return indentation + self.fixedIndent
|
|
|
|
|
|
def restIndent(self, indentation):
|
|
return (indentation + self.fixedIndent + self.hangIndent +
|
|
self.otherIndent)
|
|
|
|
|
|
def genRegular(self, originalIndent=0):
|
|
return RegularParagraph(pointTracker=self.pointTracker,
|
|
fixedIndent=self.nextIndent(),
|
|
followIndent=self.nextIndent(),
|
|
originalIndent=originalIndent)
|
|
|
|
|
|
def nextIndent(self):
|
|
return self.followIndent
|
|
|
|
|
|
|
|
class FieldParagraph(RegularParagraph):
|
|
|
|
@property
|
|
def otherIndent(self):
|
|
"""
|
|
Compute the other indent appropriate to the length of a sphinx field,
|
|
if we're wrapping a sphinx field.
|
|
"""
|
|
if self.words[0].startswith(':'):
|
|
accumulatedLength = 0
|
|
for word in self.words:
|
|
word = self.pointTracker.peek(word)
|
|
# Add the length of the word
|
|
accumulatedLength += len(word)
|
|
# Add the following space
|
|
accumulatedLength += 1
|
|
# If it gets too long then give up and go with the default.
|
|
if accumulatedLength > 10:
|
|
break
|
|
if word.endswith(":"):
|
|
return accumulatedLength * " "
|
|
return " "
|
|
|
|
|
|
def nextIndent(self):
|
|
return " "
|
|
|
|
|
|
def matchesTag(self, other):
|
|
if isinstance(other, FieldParagraph):
|
|
myWords = list(self.pointTracker.filterWords(self.words))
|
|
theirWords = list(self.pointTracker.filterWords(other.words))
|
|
if ( set([myWords[0], theirWords[0]]) ==
|
|
set(["@return:", "@rtype:"]) ):
|
|
# matching @return and @rtype fields.
|
|
return True
|
|
elif myWords[0][0] == theirWords[0][0] == ':':
|
|
# hack for sphinx: prevailing style seems to be 'group @params
|
|
# together'
|
|
if myWords[0] == theirWords[0]:
|
|
return True
|
|
elif ( set([myWords[0], theirWords[0]]) ==
|
|
set([":return:", ":rtype:"]) ):
|
|
return True
|
|
elif ( set([myWords[0], theirWords[0]]) ==
|
|
set([":param", ":type"]) and
|
|
len(myWords) > 1 and len(theirWords) > 1 and
|
|
myWords[1] == theirWords[1]):
|
|
# same as "matching @param and @type" below, but stricter;
|
|
# FIXME: these should be merged.
|
|
return True
|
|
else:
|
|
return False
|
|
elif len(myWords) > 1 and len(theirWords) > 1:
|
|
# matching @param and @type fields.
|
|
return myWords[1] == theirWords[1]
|
|
return False
|
|
else:
|
|
return False
|
|
|
|
|
|
|
|
class PreFormattedParagraph(object):
|
|
|
|
def __init__(self, before, indentBegins):
|
|
self.lines = []
|
|
self.before = before
|
|
|
|
pointTracker = before.pointTracker
|
|
|
|
fixedIndent = (before.fixedIndent + before.hangIndent +
|
|
before.otherIndent)
|
|
|
|
self.indentBegins = indentBegins
|
|
self.fixedIndent = fixedIndent
|
|
self.more = None
|
|
self.prev = None
|
|
self.pointTracker = pointTracker
|
|
|
|
|
|
def islist(self):
|
|
"""
|
|
It's not a list.
|
|
"""
|
|
return False
|
|
|
|
|
|
def connect(self, more):
|
|
self.more = more
|
|
more.prev = self
|
|
return more
|
|
|
|
|
|
@property
|
|
def originalIndent(self):
|
|
return self.indentBegins
|
|
|
|
|
|
@property
|
|
def words(self):
|
|
"""
|
|
Used by wrapper below to see if there are any words in a given
|
|
paragraph and whether it should be skipped.
|
|
"""
|
|
return bool(self.lines)
|
|
|
|
|
|
def matchesTag(self, other):
|
|
return False
|
|
|
|
|
|
def add(self, line):
|
|
actualLine = self.pointTracker.peek(line)
|
|
|
|
if actualLine.strip():
|
|
if len(actualLine) - len(actualLine.lstrip()) <= self.indentBegins:
|
|
next = self.connect(self.before.genRegular())
|
|
return next.add(line)
|
|
self.lines.append(line.rstrip())
|
|
else:
|
|
self.lines.append(line.strip())
|
|
return self
|
|
|
|
|
|
def fixIndentation(self):
|
|
while self.lines and not self.lines[0].strip():
|
|
self.lines.pop(0)
|
|
while self.lines and not self.lines[-1].strip():
|
|
self.lines.pop()
|
|
if not self.lines:
|
|
return
|
|
cleanLines = list(map(self.pointTracker.peek, self.lines))
|
|
commonLeadingIndent = min([len(x) - len(x.lstrip()) for x in cleanLines
|
|
if x.strip()] or [0])
|
|
newLines = []
|
|
for actualLine, line in zip(cleanLines, self.lines):
|
|
if actualLine != line and line[:commonLeadingIndent].strip():
|
|
# There's a marker, and it's in the leading whitespace.
|
|
# Explicitly reposition the marker at the beginning of the
|
|
# fixed indentation.
|
|
line = (self.pointTracker.marker +
|
|
actualLine[commonLeadingIndent:])
|
|
else:
|
|
line = line.rstrip()[commonLeadingIndent:]
|
|
newLines.append(line)
|
|
self.lines = newLines
|
|
|
|
|
|
def wrap(self, output, indentation, width, initialBlank, singleSpace):
|
|
# OK, now we know about all the lines we're going to know about.
|
|
self.fixIndentation()
|
|
for line in self.lines:
|
|
if self.pointTracker.peek(line):
|
|
output.write(indentation + " " + self.fixedIndent)
|
|
output.write(self.pointTracker.scan(line, output.tell()))
|
|
output.write("\n")
|
|
|
|
|
|
|
|
class PointTracker(object):
|
|
"""
|
|
Object for keeping track of where the insertion points are.
|
|
"""
|
|
|
|
def __init__(self, point):
|
|
self.point = point
|
|
self.marker = "{" + makeID() + "}"
|
|
self.outPoints = []
|
|
|
|
|
|
def annotate(self, text):
|
|
"""
|
|
Add point references to a block of text.
|
|
"""
|
|
return text[:self.point] + self.marker + text[self.point:]
|
|
|
|
|
|
def filterWords(self, words):
|
|
for word in words:
|
|
if self.isWord(word):
|
|
yield self.peek(word)
|
|
|
|
|
|
def isWord(self, text):
|
|
"""
|
|
Is the given word actually a word, or just an artifact of the
|
|
point-tracking process? If it's just the point marker by itself, then
|
|
no, it isn't, and don't insert additional whitespace after it.
|
|
"""
|
|
return not (text == self.marker)
|
|
|
|
|
|
def lengthOf(self, word):
|
|
"""
|
|
How long would this word be if it didn't have any point-markers in it?
|
|
"""
|
|
return len(self.peek(word))
|
|
|
|
|
|
def peek(self, word):
|
|
"""
|
|
What would this word look like if it didn't have any point-markers in
|
|
it?
|
|
"""
|
|
return word.replace(self.marker, "")
|
|
|
|
|
|
def extractPoints(self, text):
|
|
"""
|
|
Return a C{list} of all point markers contained in the text.
|
|
"""
|
|
if self.marker in text:
|
|
return [self.marker]
|
|
return []
|
|
|
|
|
|
def scan(self, text, offset):
|
|
"""
|
|
Scan some text for point markers, remember them, and remove them.
|
|
"""
|
|
idx = text.find(self.marker)
|
|
if idx == -1:
|
|
return text
|
|
self.outPoints.append(idx + offset)
|
|
return self.peek(text)
|
|
|
|
|
|
|
|
def wrapPythonDocstring(docstring, output, indentation=" ",
|
|
width=79, point=0, initialBlank=True,
|
|
singleSpace=False):
|
|
"""
|
|
Wrap a given Python docstring.
|
|
|
|
@param docstring: the docstring itself (just the stuff between the quotes).
|
|
@type docstring: unicode
|
|
|
|
@param output: The unicode output file to write the wrapped docstring to.
|
|
@type output: L{file}-like (C{write} takes unicode.)
|
|
|
|
@param indentation: a string (consisting only of spaces) indicating the
|
|
amount of space to shift by. Don't adjust this. It's always 4 spaces.
|
|
PEP8 says so.
|
|
@type indentation: L{unicode}
|
|
|
|
@param width: The maximum number of characters allowed in a wrapped line.
|
|
@type width: L{int}
|
|
|
|
@param point: The location of the cursor in the text, as an offset from the
|
|
beginning of the docstring. If this function is being used from within
|
|
a graphical editor, this parameter can be used (in addition to the
|
|
return value of this function) to reposition the cursor at the relative
|
|
position which the user will expect.
|
|
|
|
@param singleSpace: If true, use a single space between sentences instead
|
|
of two.
|
|
|
|
@return: The new location of the cursor.
|
|
"""
|
|
# TODO: multiple points; usable, for example, for start and end of a
|
|
# currently active selection.
|
|
pt = PointTracker(point)
|
|
start = paragraph = RegularParagraph(pt)
|
|
docstring = pt.annotate(docstring)
|
|
for line in docstring.split("\n"):
|
|
paragraph = paragraph.add(line)
|
|
prevp = None
|
|
# output.write("{}".format(initialBlank))
|
|
for paragraph in start.all():
|
|
if initialBlank:
|
|
if paragraph.words and not paragraph.matchesTag(prevp):
|
|
output.write("\n")
|
|
prevp = paragraph
|
|
paragraph.wrap(output, indentation, width, initialBlank, singleSpace)
|
|
initialBlank = True
|
|
output.write(indentation)
|
|
return pt.outPoints[0] if pt.outPoints else 0
|
|
|
|
|
|
|
|
def indentHeuristic(lines, io):
|
|
"""
|
|
Determine the indentation.
|
|
"""
|
|
for num, line in enumerate(lines):
|
|
if num == 0:
|
|
initialBlank = not bool(line)
|
|
if not initialBlank:
|
|
continue
|
|
indentation = (len(line) - len(line.lstrip()))
|
|
if indentation:
|
|
return (initialBlank, indentation)
|
|
# TODO: investigate the case where this happens.
|
|
return True, 0
|
|
|
|
|
|
|
|
def sampleDocstring():
|
|
"""This is a sample docstring where the last word is a little bit too long
|
|
go go.
|
|
|
|
This is another part of the docstring.
|
|
"""
|
|
|
|
|
|
def main(argv, indata):
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--offset", type = int)
|
|
parser.add_argument("--indent", type = int)
|
|
parser.add_argument("--width", type = int, default = 79)
|
|
parser.add_argument("--linewise", action='store_true')
|
|
parser.add_argument("--single-space", action='store_false')
|
|
namespace = parser.parse_args(argv[1:])
|
|
|
|
io = StringIO()
|
|
inlines = indata.split("\n")
|
|
if namespace.linewise:
|
|
inlines.insert(0, "")
|
|
initialBlank, indentCount = indentHeuristic(inlines, io)
|
|
point = 0
|
|
width = namespace.width
|
|
|
|
if namespace.offset is not None:
|
|
point = namespace.offset
|
|
if namespace.indent is not None:
|
|
indentCount = namespace.indent
|
|
|
|
offset = wrapPythonDocstring(
|
|
indata, io,
|
|
indentation=" " * indentCount,
|
|
width=width,
|
|
point=point,
|
|
initialBlank=initialBlank,
|
|
singleSpace=namespace.single_space
|
|
)
|
|
prefix = StringIO()
|
|
if namespace.offset is not None:
|
|
prefix.write("{:d}".format(offset))
|
|
prefix.write(" ")
|
|
|
|
output = prefix.getvalue() + io.getvalue()
|
|
if namespace.linewise:
|
|
output = "\n".join(output.split("\n")[1:-1])
|
|
return output
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.stdout.write(
|
|
toStdout(
|
|
main(
|
|
sys.argv,
|
|
fromStdin(sys.stdin.read()),
|
|
)
|
|
)
|
|
)
|
|
|
|
sys.stdout.flush()
|