emacs.d/elpa/python-docstring-20190716.921/docstring_wrap.py
2019-11-23 09:10:03 +01:00

664 lines
20 KiB
Python

# Copyright (C) 2012
# See LICENSE.txt for details.
"""
General Python docstring wrapper
================================
Utility for wrapping docstrings in Python; specifically, docstrings in
U{Epytext <http://epydoc.sourceforge.net/manual-epytext.html>} or Sphinx
ReStructureText format.
The wrapping herein generally adheres to all the conventions set forth by the
Twisted project U{http://twistedmatrix.com/}, but should be generally accurate
for most Python projects.
"""
from __future__ import unicode_literals
import argparse
import sys
import re
from io import StringIO
from uuid import uuid4
__all__ = [
"wrapPythonDocstring"
]
if sys.version_info[0] <= 2:
makeID = lambda: unicode(uuid4())
fromStdin = lambda s: s.decode("utf-8")
toStdout = lambda s: s.encode("utf-8")
PY2 = True
else:
makeID = lambda: str(uuid4())
fromStdin = lambda s: s
toStdout = lambda s: s
PY2 = False
def isUnderline(expr):
return bool(re.match("[=]+$", expr) or re.match("[-]+$", expr))
def startslist(x):
return (x == '-' or (x.endswith(".") and (x[:-1].isdigit()
or x[:-1] == '#')))
def isAcronym(word):
"""
Is the given word an acronym (separated by periods, so it doesn't end a
sentence)? cf. lots of interesting acronyms, e.g. this is one. solve for
x. a.b.c. is also one. You might also want to give an example
parenthetically (e.g. this one).
"""
word = word.strip("(")
return ((len(word) > 2 and word[1::2] == '.' * int(len(word) / 2)) or
word in ["cf.", "viz."])
def isSentenceEnd(prevWord):
"""
Is the given word the end of a sentence?
"""
if not prevWord:
return False
# Exclamation points and question marks generally end sentences.
if prevWord[-1] in "?!":
return True
# Now, if it's not a period, it's probably not the end of a sentence.
if prevWord[-1] != ".":
return False
if isAcronym(prevWord):
return False
return True
def beginsField(line):
"""
Does the given (stripped) line begin an epytext or ReST field?
"""
if line.startswith("@"):
return True
sphinxwords = """
param params return type rtype summary var ivar cvar raises raise except
exception
""".split()
for word in sphinxwords:
if line.startswith(":" + word):
return True
return False
class RegularParagraph(object):
otherIndent = ""
def __init__(self, pointTracker, fixedIndent="", hangIndent="",
followIndent="", originalIndent=0):
self.words = []
self.fixedIndent = fixedIndent
self.hangIndent = hangIndent
self.followIndent = followIndent
self.more = None
self.prev = None
self.pointTracker = pointTracker
# originalIndent is the width of the indentation of the line this
# paragraph originally came from in the input text.
self.originalIndent = originalIndent
self._unwrappedLines = 0
self._headingType = None
self._headingPoints = []
def matchesTag(self, other):
return False
def __nonzero__(self):
return bool(self.words)
def all(self):
while self is not None:
#print self.__class__.__name__
if self:
yield self
self = self.more
def setIsHeading(self, headingType):
self._headingType = headingType
def isHeading(self):
return bool(self._headingType)
def connect(self, more):
self.more = more
more.prev = self
return more
def islist(self):
return self.words and startslist(self.words[0])
def previousListPeer(self):
"""
Find a previous paragraph that is also a list element, of the same
indentation level if one exists.
"""
previous = self.prev
matched = None
while previous:
if not previous.words:
previous = previous.prev
continue
if not previous.islist():
break
if previous.originalIndent <= self.originalIndent:
return previous
if previous.originalIndent > self.originalIndent:
matched = previous
previous = previous.prev
if matched:
return matched
def add(self, line):
clean = self.pointTracker.peek(line)
stripped = clean.strip()
thisLineIndent = len(clean) - len(clean.lstrip())
if stripped:
self._unwrappedLines += 1
active = self
firstword = list(self.pointTracker.filterWords(line.split()))[0]
if beginsField(stripped):
fp = FieldParagraph(pointTracker=self.pointTracker, originalIndent=thisLineIndent)
fp.words.extend(line.split())
active = active.connect(fp)
elif isUnderline(stripped) and self._unwrappedLines == 2:
# This paragraph is actually a section heading.
active.setIsHeading(stripped[0])
self._headingPoints = self.pointTracker.extractPoints(line)
# FIXME: should respect leading indentation.
active = active.connect(self.genRegular(originalIndent=thisLineIndent))
elif startslist(firstword):
# Aesthetically I prefer a 2-space indent here, but the
# convention in the codebase seems to be 4 spaces.
LIST_INDENT = 4
# FIXME: this also needs to respect leading indentation so it
# can properly represent nested lists.
hangIndent = self.pointTracker.lengthOf(firstword) + 1
fi = self.fixedIndent
if not (self.words and startslist(self.words[0])):
fi += (" " * LIST_INDENT)
fp = RegularParagraph(
pointTracker=self.pointTracker,
fixedIndent=fi,
hangIndent=" " * hangIndent,
followIndent=self.followIndent,
originalIndent=thisLineIndent,
)
fp.words.extend(line.split())
fp.prev = self
peer = fp.previousListPeer()
if peer:
if peer.originalIndent >= fp.originalIndent:
fp.fixedIndent = peer.fixedIndent
else:
fp.fixedIndent = peer.fixedIndent + (" " * LIST_INDENT)
active = active.connect(fp)
else:
self.words.extend(line.split())
if stripped.endswith("::"):
active = active.connect(PreFormattedParagraph(
active,
indentBegins=thisLineIndent
))
return active
else:
rawstrip = line.strip()
if rawstrip:
self.words.append(rawstrip)
if len(list(self.pointTracker.filterWords(self.words))):
return self.connect(self.genRegular(originalIndent=thisLineIndent))
return self
def wrap(self, output, indentation, width, initialBlank, singleSpace):
maxWidthThisLine = width
if not self.words:
return
if initialBlank:
thisLine = self.firstIndent(indentation)
else:
thisLine = ''
maxWidthThisLine -= (3 + len(indentation))
first = True
prevWord = ''
for num, word in enumerate(self.words):
if not self.pointTracker.isWord(word):
thisLine += word
continue
normalPrevWord = self.pointTracker.peek(prevWord)
if num == 1 and startslist(normalPrevWord):
spaces = 1
elif isSentenceEnd(normalPrevWord) and singleSpace:
spaces = 2
else:
spaces = 1
prevWord = word
thisLineWidthWithThisWord = (self.pointTracker.lengthOf(thisLine) +
self.pointTracker.lengthOf(word) +
spaces)
if thisLineWidthWithThisWord <= maxWidthThisLine or first:
if first:
first = not first
else:
thisLine += (" " * spaces)
thisLine += word
else:
output.write(self.pointTracker.scan(thisLine, output.tell()))
output.write("\n")
maxWidthThisLine = width
thisLine = self.restIndent(indentation) + word
output.write(self.pointTracker.scan(thisLine, output.tell()))
output.write("\n")
if self.isHeading():
indentText = self.firstIndent(indentation)
lineSize = self.pointTracker.lengthOf(thisLine) - len(indentText)
output.write(self.pointTracker.scan(
indentText + ''.join(self._headingPoints) +
(self._headingType * lineSize), output.tell()
))
output.write("\n")
def firstIndent(self, indentation):
return indentation + self.fixedIndent
def restIndent(self, indentation):
return (indentation + self.fixedIndent + self.hangIndent +
self.otherIndent)
def genRegular(self, originalIndent=0):
return RegularParagraph(pointTracker=self.pointTracker,
fixedIndent=self.nextIndent(),
followIndent=self.nextIndent(),
originalIndent=originalIndent)
def nextIndent(self):
return self.followIndent
class FieldParagraph(RegularParagraph):
@property
def otherIndent(self):
"""
Compute the other indent appropriate to the length of a sphinx field,
if we're wrapping a sphinx field.
"""
if self.words[0].startswith(':'):
accumulatedLength = 0
for word in self.words:
word = self.pointTracker.peek(word)
# Add the length of the word
accumulatedLength += len(word)
# Add the following space
accumulatedLength += 1
# If it gets too long then give up and go with the default.
if accumulatedLength > 10:
break
if word.endswith(":"):
return accumulatedLength * " "
return " "
def nextIndent(self):
return " "
def matchesTag(self, other):
if isinstance(other, FieldParagraph):
myWords = list(self.pointTracker.filterWords(self.words))
theirWords = list(self.pointTracker.filterWords(other.words))
if ( set([myWords[0], theirWords[0]]) ==
set(["@return:", "@rtype:"]) ):
# matching @return and @rtype fields.
return True
elif myWords[0][0] == theirWords[0][0] == ':':
# hack for sphinx: prevailing style seems to be 'group @params
# together'
if myWords[0] == theirWords[0]:
return True
elif ( set([myWords[0], theirWords[0]]) ==
set([":return:", ":rtype:"]) ):
return True
elif ( set([myWords[0], theirWords[0]]) ==
set([":param", ":type"]) and
len(myWords) > 1 and len(theirWords) > 1 and
myWords[1] == theirWords[1]):
# same as "matching @param and @type" below, but stricter;
# FIXME: these should be merged.
return True
else:
return False
elif len(myWords) > 1 and len(theirWords) > 1:
# matching @param and @type fields.
return myWords[1] == theirWords[1]
return False
else:
return False
class PreFormattedParagraph(object):
def __init__(self, before, indentBegins):
self.lines = []
self.before = before
pointTracker = before.pointTracker
fixedIndent = (before.fixedIndent + before.hangIndent +
before.otherIndent)
self.indentBegins = indentBegins
self.fixedIndent = fixedIndent
self.more = None
self.prev = None
self.pointTracker = pointTracker
def islist(self):
"""
It's not a list.
"""
return False
def connect(self, more):
self.more = more
more.prev = self
return more
@property
def originalIndent(self):
return self.indentBegins
@property
def words(self):
"""
Used by wrapper below to see if there are any words in a given
paragraph and whether it should be skipped.
"""
return bool(self.lines)
def matchesTag(self, other):
return False
def add(self, line):
actualLine = self.pointTracker.peek(line)
if actualLine.strip():
if len(actualLine) - len(actualLine.lstrip()) <= self.indentBegins:
next = self.connect(self.before.genRegular())
return next.add(line)
self.lines.append(line.rstrip())
else:
self.lines.append(line.strip())
return self
def fixIndentation(self):
while self.lines and not self.lines[0].strip():
self.lines.pop(0)
while self.lines and not self.lines[-1].strip():
self.lines.pop()
if not self.lines:
return
cleanLines = list(map(self.pointTracker.peek, self.lines))
commonLeadingIndent = min([len(x) - len(x.lstrip()) for x in cleanLines
if x.strip()] or [0])
newLines = []
for actualLine, line in zip(cleanLines, self.lines):
if actualLine != line and line[:commonLeadingIndent].strip():
# There's a marker, and it's in the leading whitespace.
# Explicitly reposition the marker at the beginning of the
# fixed indentation.
line = (self.pointTracker.marker +
actualLine[commonLeadingIndent:])
else:
line = line.rstrip()[commonLeadingIndent:]
newLines.append(line)
self.lines = newLines
def wrap(self, output, indentation, width, initialBlank, singleSpace):
# OK, now we know about all the lines we're going to know about.
self.fixIndentation()
for line in self.lines:
if self.pointTracker.peek(line):
output.write(indentation + " " + self.fixedIndent)
output.write(self.pointTracker.scan(line, output.tell()))
output.write("\n")
class PointTracker(object):
"""
Object for keeping track of where the insertion points are.
"""
def __init__(self, point):
self.point = point
self.marker = "{" + makeID() + "}"
self.outPoints = []
def annotate(self, text):
"""
Add point references to a block of text.
"""
return text[:self.point] + self.marker + text[self.point:]
def filterWords(self, words):
for word in words:
if self.isWord(word):
yield self.peek(word)
def isWord(self, text):
"""
Is the given word actually a word, or just an artifact of the
point-tracking process? If it's just the point marker by itself, then
no, it isn't, and don't insert additional whitespace after it.
"""
return not (text == self.marker)
def lengthOf(self, word):
"""
How long would this word be if it didn't have any point-markers in it?
"""
return len(self.peek(word))
def peek(self, word):
"""
What would this word look like if it didn't have any point-markers in
it?
"""
return word.replace(self.marker, "")
def extractPoints(self, text):
"""
Return a C{list} of all point markers contained in the text.
"""
if self.marker in text:
return [self.marker]
return []
def scan(self, text, offset):
"""
Scan some text for point markers, remember them, and remove them.
"""
idx = text.find(self.marker)
if idx == -1:
return text
self.outPoints.append(idx + offset)
return self.peek(text)
def wrapPythonDocstring(docstring, output, indentation=" ",
width=79, point=0, initialBlank=True,
singleSpace=False):
"""
Wrap a given Python docstring.
@param docstring: the docstring itself (just the stuff between the quotes).
@type docstring: unicode
@param output: The unicode output file to write the wrapped docstring to.
@type output: L{file}-like (C{write} takes unicode.)
@param indentation: a string (consisting only of spaces) indicating the
amount of space to shift by. Don't adjust this. It's always 4 spaces.
PEP8 says so.
@type indentation: L{unicode}
@param width: The maximum number of characters allowed in a wrapped line.
@type width: L{int}
@param point: The location of the cursor in the text, as an offset from the
beginning of the docstring. If this function is being used from within
a graphical editor, this parameter can be used (in addition to the
return value of this function) to reposition the cursor at the relative
position which the user will expect.
@param singleSpace: If true, use a single space between sentences instead
of two.
@return: The new location of the cursor.
"""
# TODO: multiple points; usable, for example, for start and end of a
# currently active selection.
pt = PointTracker(point)
start = paragraph = RegularParagraph(pt)
docstring = pt.annotate(docstring)
for line in docstring.split("\n"):
paragraph = paragraph.add(line)
prevp = None
# output.write("{}".format(initialBlank))
for paragraph in start.all():
if initialBlank:
if paragraph.words and not paragraph.matchesTag(prevp):
output.write("\n")
prevp = paragraph
paragraph.wrap(output, indentation, width, initialBlank, singleSpace)
initialBlank = True
output.write(indentation)
return pt.outPoints[0] if pt.outPoints else 0
def indentHeuristic(lines, io):
"""
Determine the indentation.
"""
for num, line in enumerate(lines):
if num == 0:
initialBlank = not bool(line)
if not initialBlank:
continue
indentation = (len(line) - len(line.lstrip()))
if indentation:
return (initialBlank, indentation)
# TODO: investigate the case where this happens.
return True, 0
def sampleDocstring():
"""This is a sample docstring where the last word is a little bit too long
go go.
This is another part of the docstring.
"""
def main(argv, indata):
parser = argparse.ArgumentParser()
parser.add_argument("--offset", type = int)
parser.add_argument("--indent", type = int)
parser.add_argument("--width", type = int, default = 79)
parser.add_argument("--linewise", action='store_true')
parser.add_argument("--single-space", action='store_false')
namespace = parser.parse_args(argv[1:])
io = StringIO()
inlines = indata.split("\n")
if namespace.linewise:
inlines.insert(0, "")
initialBlank, indentCount = indentHeuristic(inlines, io)
point = 0
width = namespace.width
if namespace.offset is not None:
point = namespace.offset
if namespace.indent is not None:
indentCount = namespace.indent
offset = wrapPythonDocstring(
indata, io,
indentation=" " * indentCount,
width=width,
point=point,
initialBlank=initialBlank,
singleSpace=namespace.single_space
)
prefix = StringIO()
if namespace.offset is not None:
prefix.write("{:d}".format(offset))
prefix.write(" ")
output = prefix.getvalue() + io.getvalue()
if namespace.linewise:
output = "\n".join(output.split("\n")[1:-1])
return output
if __name__ == '__main__':
sys.stdout.write(
toStdout(
main(
sys.argv,
fromStdin(sys.stdin.read()),
)
)
)
sys.stdout.flush()