diff --git a/pep8.py b/pep8.py index c22e63ee..dbc58487 100755 --- a/pep8.py +++ b/pep8.py @@ -104,7 +104,6 @@ INDENT_REGEX = re.compile(r'([ \t]*)') RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$') -ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') DOCSTRING_REGEX = re.compile(r'u?r?["\']') EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') @@ -124,1991 +123,2015 @@ ############################################################################## -# Plugins (check functions) for physical lines +# Framework to run all checks ############################################################################## +thismodule = sys.modules[__name__] -def tabs_or_spaces(physical_line, indent_char): - r"""Never mix tabs and spaces. +for i in range(0, 999): + for c in range(ord('A'), ord('Z') + 1): + name_value = '%s%s' % (chr(c), i) + setattr(thismodule, name_value, name_value) - The most popular way of indenting Python is with spaces only. The - second-most popular way is with tabs only. Code indented with a mixture - of tabs and spaces should be converted to using spaces exclusively. When - invoking the Python command line interpreter with the -t option, it issues - warnings about code that illegally mixes tabs and spaces. When using -tt - these warnings become errors. These options are highly recommended! +_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} - Okay: if a == 0:\n a = 1\n b = 1 - E101: if a == 0:\n a = 1\n\tb = 1 - """ - indent = INDENT_REGEX.match(physical_line).group(1) - for offset, char in enumerate(indent): - if char != indent_char: - return offset, "E101 indentation contains mixed spaces and tabs" +def _get_parameters(function): + if sys.version_info >= (3, 3): + return list(inspect.signature(function).parameters) + else: + return inspect.getargspec(function)[0] -def tabs_obsolete(physical_line): - r"""For new projects, spaces-only are strongly recommended over tabs. - Okay: if True:\n return - W191: if True:\n\treturn - """ - indent = INDENT_REGEX.match(physical_line).group(1) - if '\t' in indent: - return indent.index('\t'), "W191 indentation contains tabs" +if '' == ''.encode(): + # Python 2: implicit encoding. + def readlines(filename): + """Read the source code.""" + with open(filename, 'rU') as f: + return f.readlines() + isidentifier = re.compile(r'[a-zA-Z_]\w*$').match + stdin_get_value = sys.stdin.read +else: + # Python 3 + def readlines(filename): + """Read the source code.""" + try: + with open(filename, 'rb') as f: + (coding, lines) = tokenize.detect_encoding(f.readline) + f = TextIOWrapper(f, coding, line_buffering=True) + return [l.decode(coding) for l in lines] + f.readlines() + except (LookupError, SyntaxError, UnicodeError): + # Fall back if file encoding is improperly declared + with open(filename, encoding='latin-1') as f: + return f.readlines() + isidentifier = str.isidentifier + def stdin_get_value(): + return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() +noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search -def trailing_whitespace(physical_line): - r"""Trailing whitespace is superfluous. - The warning returned varies on whether the line itself is blank, for easier - filtering for those who want to indent their blank lines. +def expand_indent(line): + r"""Return the amount of indentation. - Okay: spam(1)\n# - W291: spam(1) \n# - W293: class Foo(object):\n \n bang = 12 + Tabs are expanded to the next multiple of 8. + + >>> expand_indent(' ') + 4 + >>> expand_indent('\t') + 8 + >>> expand_indent(' \t') + 8 + >>> expand_indent(' \t') + 16 """ - physical_line = physical_line.rstrip('\n') # chr(10), newline - physical_line = physical_line.rstrip('\r') # chr(13), carriage return - physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L - stripped = physical_line.rstrip(' \t\v') - if physical_line != stripped: - if stripped: - return len(stripped), "W291 trailing whitespace" + if '\t' not in line: + return len(line) - len(line.lstrip()) + result = 0 + for char in line: + if char == '\t': + result = result // 8 * 8 + 8 + elif char == ' ': + result += 1 else: - return 0, "W293 blank line contains whitespace" - + break + return result -def trailing_blank_lines(physical_line, lines, line_number, total_lines): - r"""Trailing blank lines are superfluous. - Okay: spam(1) - W391: spam(1)\n +def mute_string(text): + """Replace contents with 'xxx' to prevent syntax matching. - However the last line should end with a new line (warning W292). + >>> mute_string('"abc"') + '"xxx"' + >>> mute_string("'''abc'''") + "'''xxx'''" + >>> mute_string("r'abc'") + "r'xxx'" """ - if line_number == total_lines: - stripped_last_line = physical_line.rstrip() - if not stripped_last_line: - return 0, "W391 blank line at end of file" - if stripped_last_line == physical_line: - return len(physical_line), "W292 no newline at end of file" + # String modifiers (e.g. u or r) + start = text.index(text[-1]) + 1 + end = len(text) - 1 + # Triple quotes + if text[-3:] in ('"""', "'''"): + start += 2 + end -= 2 + return text[:start] + 'x' * (end - start) + text[end:] -def maximum_line_length(physical_line, max_line_length, multiline): - r"""Limit all lines to a maximum of 79 characters. +def parse_udiff(diff, patterns=None, parent='.'): + """Return a dictionary of matching lines.""" + # For each file of the diff, the entry key is the filename, + # and the value is a set of row numbers to consider. + rv = {} + path = nrows = None + for line in diff.splitlines(): + if nrows: + if line[:1] != '-': + nrows -= 1 + continue + if line[:3] == '@@ ': + hunk_match = HUNK_REGEX.match(line) + (row, nrows) = [int(g or '1') for g in hunk_match.groups()] + rv[path].update(range(row, row + nrows)) + elif line[:3] == '+++': + path = line[4:].split('\t', 1)[0] + if path[:2] == 'b/': + path = path[2:] + rv[path] = set() + return dict([(os.path.join(parent, path), rows) + for (path, rows) in rv.items() + if rows and filename_match(path, patterns)]) - There are still many devices around that are limited to 80 character - lines; plus, limiting windows to 80 characters makes it possible to have - several windows side-by-side. The default wrapping on such devices looks - ugly. Therefore, please limit all lines to a maximum of 79 characters. - For flowing long blocks of text (docstrings or comments), limiting the - length to 72 characters is recommended. - Reports error E501. +def normalize_paths(value, parent=os.curdir): + """Parse a comma-separated list of paths. + + Return a list of absolute paths. """ - line = physical_line.rstrip() - length = len(line) - if length > max_line_length and not noqa(line): - # Special case for long URLs in multi-line docstrings or comments, - # but still report the error when the 72 first chars are whitespaces. - chunks = line.split() - if ((len(chunks) == 1 and multiline) or - (len(chunks) == 2 and chunks[0] == '#')) and \ - len(line) - len(chunks[-1]) < max_line_length - 7: - return - if hasattr(line, 'decode'): # Python 2 - # The line could contain multi-byte characters - try: - length = len(line.decode('utf-8')) - except UnicodeError: - pass - if length > max_line_length: - return (max_line_length, "E501 line too long " - "(%d > %d characters)" % (length, max_line_length)) + if not value: + return [] + if isinstance(value, list): + return value + paths = [] + for path in value.split(','): + path = path.strip() + if '/' in path: + path = os.path.abspath(os.path.join(parent, path)) + paths.append(path.rstrip('/')) + return paths -############################################################################## -# Plugins (check functions) for logical lines -############################################################################## +def filename_match(filename, patterns, default=True): + """Check if patterns contains a pattern that matches filename. + If patterns is unspecified, this always returns True. + """ + if not patterns: + return default + return any(fnmatch(filename, pattern) for pattern in patterns) -def blank_lines(logical_line, blank_lines, indent_level, line_number, - blank_before, previous_logical, previous_indent_level): - r"""Separate top-level function and class definitions with two blank lines. - Method definitions inside a class are separated by a single blank line. +def _is_eol_token(token): + return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n' +if COMMENT_WITH_NL: + def _is_eol_token(token, _eol_token=_is_eol_token): + return _eol_token(token) or (token[0] == tokenize.COMMENT and + token[1] == token[4]) - Extra blank lines may be used (sparingly) to separate groups of related - functions. Blank lines may be omitted between a bunch of related - one-liners (e.g. a set of dummy implementations). - Use blank lines in functions, sparingly, to indicate logical sections. +class BaseReport(object): + """Collect the results of the checks.""" - Okay: def a():\n pass\n\n\ndef b():\n pass - Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass + print_filename = False - E301: class Foo:\n b = 0\n def bar():\n pass - E302: def a():\n pass\n\ndef b(n):\n pass - E303: def a():\n pass\n\n\n\ndef b(n):\n pass - E303: def a():\n\n\n\n pass - E304: @decorator\n\ndef a():\n pass - """ - if line_number < 3 and not previous_logical: - return # Don't expect blank lines before the first line - if previous_logical.startswith('@'): - if blank_lines: - yield 0, "E304 blank lines found after function decorator" - elif blank_lines > 2 or (indent_level and blank_lines == 2): - yield 0, "E303 too many blank lines (%d)" % blank_lines - elif logical_line.startswith(('def ', 'class ', '@')): - if indent_level: - if not (blank_before or previous_indent_level < indent_level or - DOCSTRING_REGEX.match(previous_logical)): - yield 0, "E301 expected 1 blank line, found 0" - elif blank_before != 2: - yield 0, "E302 expected 2 blank lines, found %d" % blank_before + def __init__(self, options): + self._benchmark_keys = options.benchmark_keys + self._ignore_code = options.ignore_code + # Results + self.elapsed = 0 + self.total_errors = 0 + self.counters = dict.fromkeys(self._benchmark_keys, 0) + self.messages = {} + def start(self): + """Start the timer.""" + self._start_time = time.time() -def extraneous_whitespace(logical_line): - r"""Avoid extraneous whitespace. + def stop(self): + """Stop the timer.""" + self.elapsed = time.time() - self._start_time - Avoid extraneous whitespace in these situations: - - Immediately inside parentheses, brackets or braces. - - Immediately before a comma, semicolon, or colon. + def init_file(self, filename, lines, expected, line_offset): + """Signal a new file.""" + self.filename = filename + self.lines = lines + self.expected = expected or () + self.line_offset = line_offset + self.file_errors = 0 + self.counters['files'] += 1 + self.counters['physical lines'] += len(lines) - Okay: spam(ham[1], {eggs: 2}) - E201: spam( ham[1], {eggs: 2}) - E201: spam(ham[ 1], {eggs: 2}) - E201: spam(ham[1], { eggs: 2}) - E202: spam(ham[1], {eggs: 2} ) - E202: spam(ham[1 ], {eggs: 2}) - E202: spam(ham[1], {eggs: 2 }) - - E203: if x == 4: print x, y; x, y = y , x - E203: if x == 4: print x, y ; x, y = y, x - E203: if x == 4 : print x, y; x, y = y, x - """ - line = logical_line - for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): - text = match.group() - char = text.strip() - found = match.start() - if text == char + ' ': - # assert char in '([{' - yield found + 1, "E201 whitespace after '%s'" % char - elif line[found - 1] != ',': - code = ('E202' if char in '}])' else 'E203') # if char in ',;:' - yield found, "%s whitespace before '%s'" % (code, char) + def increment_logical_line(self): + """Signal a new logical line.""" + self.counters['logical lines'] += 1 + def error(self, line_number, offset, text, check): + """Report an error, according to options.""" + code = text[:4] + if self._ignore_code(code): + return + if code in self.counters: + self.counters[code] += 1 + else: + self.counters[code] = 1 + self.messages[code] = text[5:] + # Don't care about expected errors or warnings + if code in self.expected: + return + if self.print_filename and not self.file_errors: + print(self.filename) + self.file_errors += 1 + self.total_errors += 1 + return code -def whitespace_around_keywords(logical_line): - r"""Avoid extraneous whitespace around keywords. + def get_file_results(self): + """Return the count of errors and warnings for this file.""" + return self.file_errors - Okay: True and False - E271: True and False - E272: True and False - E273: True and\tFalse - E274: True\tand False - """ - for match in KEYWORD_REGEX.finditer(logical_line): - before, after = match.groups() + def get_count(self, prefix=''): + """Return the total count of errors and warnings.""" + return sum([self.counters[key] + for key in self.messages if key.startswith(prefix)]) - if '\t' in before: - yield match.start(1), "E274 tab before keyword" - elif len(before) > 1: - yield match.start(1), "E272 multiple spaces before keyword" + def get_statistics(self, prefix=''): + """Get statistics for message codes that start with the prefix. - if '\t' in after: - yield match.start(2), "E273 tab after keyword" - elif len(after) > 1: - yield match.start(2), "E271 multiple spaces after keyword" + prefix='' matches all errors and warnings + prefix='E' matches all errors + prefix='W' matches all warnings + prefix='E4' matches all errors that have to do with imports + """ + return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) + for key in sorted(self.messages) if key.startswith(prefix)] + def print_statistics(self, prefix=''): + """Print overall statistics (number of errors and warnings).""" + for line in self.get_statistics(prefix): + print(line) -def missing_whitespace(logical_line): - r"""Each comma, semicolon or colon should be followed by whitespace. + def print_benchmark(self): + """Print benchmark numbers.""" + print('%-7.2f %s' % (self.elapsed, 'seconds elapsed')) + if self.elapsed: + for key in self._benchmark_keys: + print('%-7d %s per second (%d total)' % + (self.counters[key] / self.elapsed, key, + self.counters[key])) - Okay: [a, b] - Okay: (3,) - Okay: a[1:4] - Okay: a[:4] - Okay: a[1:] - Okay: a[1:4:2] - E231: ['a','b'] - E231: foo(bar,baz) - E231: [{'a':'b'}] - """ - line = logical_line - for index in range(len(line) - 1): - char = line[index] - if char in ',;:' and line[index + 1] not in WHITESPACE: - before = line[:index] - if char == ':' and before.count('[') > before.count(']') and \ - before.rfind('{') < before.rfind('['): - continue # Slice syntax, no space required - if char == ',' and line[index + 1] == ')': - continue # Allow tuple with only one element: (3,) - yield index, "E231 missing whitespace after '%s'" % char +class FileReport(BaseReport): + """Collect the results of the checks and print only the filenames.""" + print_filename = True -def indentation(logical_line, previous_logical, indent_char, - indent_level, previous_indent_level): - r"""Use 4 spaces per indentation level. - For really old code that you don't want to mess up, you can continue to - use 8-space tabs. +class StandardReport(BaseReport): + """Collect and print the results of the checks.""" - Okay: a = 1 - Okay: if a == 0:\n a = 1 - E111: a = 1 - E114: # a = 1 + def __init__(self, options): + super(StandardReport, self).__init__(options) + self._fmt = REPORT_FORMAT.get(options.format.lower(), + options.format) + self._repeat = options.repeat + self._show_source = options.show_source + self._show_pep8 = options.show_pep8 - Okay: for item in items:\n pass - E112: for item in items:\npass - E115: for item in items:\n# Hi\n pass + def init_file(self, filename, lines, expected, line_offset): + """Signal a new file.""" + self._deferred_print = [] + return super(StandardReport, self).init_file( + filename, lines, expected, line_offset) - Okay: a = 1\nb = 2 - E113: a = 1\n b = 2 - E116: a = 1\n # b = 2 - """ - c = 0 if logical_line else 3 - tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)" - if indent_level % 4: - yield 0, tmpl % (1 + c, "indentation is not a multiple of four") - indent_expect = previous_logical.endswith(':') - if indent_expect and indent_level <= previous_indent_level: - yield 0, tmpl % (2 + c, "expected an indented block") - elif not indent_expect and indent_level > previous_indent_level: - yield 0, tmpl % (3 + c, "unexpected indentation") + def error(self, line_number, offset, text, check): + """Report an error, according to options.""" + code = super(StandardReport, self).error(line_number, offset, + text, check) + if code and (self.counters[code] == 1 or self._repeat): + self._deferred_print.append( + (line_number, offset, code, text[5:], check.__doc__)) + return code + def get_file_results(self): + """Print the result and return the overall count for this file.""" + self._deferred_print.sort() + for line_number, offset, code, text, doc in self._deferred_print: + print(self._fmt % { + 'path': self.filename, + 'row': self.line_offset + line_number, 'col': offset + 1, + 'code': code, 'text': text, + }) + if self._show_source: + if line_number > len(self.lines): + line = '' + else: + line = self.lines[line_number - 1] + print(line.rstrip()) + print(re.sub(r'\S', ' ', line[:offset]) + '^') + if self._show_pep8 and doc: + print(' ' + doc.strip()) -def continued_indentation(logical_line, tokens, indent_level, hang_closing, - indent_char, noqa, verbose): - r"""Continuation lines indentation. + # stdout is block buffered when not stdout.isatty(). + # line can be broken where buffer boundary since other processes + # write to same file. + # flush() after print() to avoid buffer boundary. + # Typical buffer size is 8192. line written safely when + # len(line) < 8192. + sys.stdout.flush() + return self.file_errors - Continuation lines should align wrapped elements either vertically - using Python's implicit line joining inside parentheses, brackets - and braces, or using a hanging indent. - When using a hanging indent these considerations should be applied: - - there should be no arguments on the first line, and - - further indentation should be used to clearly distinguish itself as a - continuation line. +class DiffReport(StandardReport): + """Collect and print the results for the changed lines only.""" - Okay: a = (\n) - E123: a = (\n ) + def __init__(self, options): + super(DiffReport, self).__init__(options) + self._selected = options.selected_lines - Okay: a = (\n 42) - E121: a = (\n 42) - E122: a = (\n42) - E123: a = (\n 42\n ) - E124: a = (24,\n 42\n) - E125: if (\n b):\n pass - E126: a = (\n 42) - E127: a = (24,\n 42) - E128: a = (24,\n 42) - E129: if (a or\n b):\n pass - E131: a = (\n 42\n 24) - """ - first_row = tokens[0][2][0] - nrows = 1 + tokens[-1][2][0] - first_row - if noqa or nrows == 1: - return + def error(self, line_number, offset, text, check): + if line_number not in self._selected[self.filename]: + return + return super(DiffReport, self).error(line_number, offset, text, check) - # indent_next tells us whether the next block is indented; assuming - # that it is indented by 4 spaces, then we should not allow 4-space - # indents on the final continuation line; in turn, some other - # indents are allowed to have an extra 4 spaces. - indent_next = logical_line.endswith(':') - row = depth = 0 - valid_hangs = (4,) if indent_char != '\t' else (4, 8) - # remember how many brackets were opened on each line - parens = [0] * nrows - # relative indents of physical lines - rel_indent = [0] * nrows - # for each depth, collect a list of opening rows - open_rows = [[0]] - # for each depth, memorize the hanging indentation - hangs = [None] - # visual indents - indent_chances = {} - last_indent = tokens[0][2] - visual_indent = None - last_token_multiline = False - # for each depth, memorize the visual indent column - indent = [last_indent[1]] - if verbose >= 3: - print(">>> " + tokens[0][4].rstrip()) +def read_config(options, args, arglist, parser): + """Read and parse configurations - for token_type, text, start, end, line in tokens: + If a config file is specified on the command line with the "--config" + option, then only it is used for configuration. - newline = row < start[0] - first_row - if newline: - row = start[0] - first_row - newline = not last_token_multiline and token_type not in NEWLINE + Otherwise, the user configuration (~/.config/pep8) and any local + configurations in the current directory or above will be merged together + (in that order) using the read method of ConfigParser. + """ + config = RawConfigParser() - if newline: - # this is the beginning of a continuation line. - last_indent = start - if verbose >= 3: - print("... " + line.rstrip()) + cli_conf = options.config - # record the initial indent. - rel_indent[row] = expand_indent(line) - indent_level + local_dir = os.curdir - # identify closing bracket - close_bracket = (token_type == tokenize.OP and text in ']})') + if USER_CONFIG and os.path.isfile(USER_CONFIG): + if options.verbose: + print('user configuration: %s' % USER_CONFIG) + config.read(USER_CONFIG) - # is the indent relative to an opening bracket line? - for open_row in reversed(open_rows[depth]): - hang = rel_indent[row] - rel_indent[open_row] - hanging_indent = hang in valid_hangs - if hanging_indent: - break - if hangs[depth]: - hanging_indent = (hang == hangs[depth]) - # is there any chance of visual indent? - visual_indent = (not close_bracket and hang > 0 and - indent_chances.get(start[1])) + parent = tail = args and os.path.abspath(os.path.commonprefix(args)) + while tail: + if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG): + local_dir = parent + if options.verbose: + print('local configuration: in %s' % parent) + break + (parent, tail) = os.path.split(parent) - if close_bracket and indent[depth]: - # closing bracket for visual indent - if start[1] != indent[depth]: - yield (start, "E124 closing bracket does not match " - "visual indentation") - elif close_bracket and not hang: - # closing bracket matches indentation of opening bracket's line - if hang_closing: - yield start, "E133 closing bracket is missing indentation" - elif indent[depth] and start[1] < indent[depth]: - if visual_indent is not True: - # visual indent is broken - yield (start, "E128 continuation line " - "under-indented for visual indent") - elif hanging_indent or (indent_next and rel_indent[row] == 8): - # hanging indent is verified - if close_bracket and not hang_closing: - yield (start, "E123 closing bracket does not match " - "indentation of opening bracket's line") - hangs[depth] = hang - elif visual_indent is True: - # visual indent is verified - indent[depth] = start[1] - elif visual_indent in (text, str): - # ignore token lined up with matching one from a previous line - pass - else: - # indent is broken - if hang <= 0: - error = "E122", "missing indentation or outdented" - elif indent[depth]: - error = "E127", "over-indented for visual indent" - elif not close_bracket and hangs[depth]: - error = "E131", "unaligned for hanging indent" - else: - hangs[depth] = hang - if hang > 4: - error = "E126", "over-indented for hanging indent" - else: - error = "E121", "under-indented for hanging indent" - yield start, "%s continuation line %s" % error + if cli_conf and os.path.isfile(cli_conf): + if options.verbose: + print('cli configuration: %s' % cli_conf) + config.read(cli_conf) - # look for visual indenting - if (parens[row] and - token_type not in (tokenize.NL, tokenize.COMMENT) and - not indent[depth]): - indent[depth] = start[1] - indent_chances[start[1]] = True - if verbose >= 4: - print("bracket depth %s indent to %s" % (depth, start[1])) - # deal with implicit string concatenation - elif (token_type in (tokenize.STRING, tokenize.COMMENT) or - text in ('u', 'ur', 'b', 'br')): - indent_chances[start[1]] = str - # special case for the "if" statement because len("if (") == 4 - elif not indent_chances and not row and not depth and text == 'if': - indent_chances[end[1] + 1] = True - elif text == ':' and line[end[1]:].isspace(): - open_rows[depth].append(row) + pep8_section = parser.prog + if config.has_section(pep8_section): + option_list = dict([(o.dest, o.type or o.action) + for o in parser.option_list]) - # keep track of bracket depth - if token_type == tokenize.OP: - if text in '([{': - depth += 1 - indent.append(0) - hangs.append(None) - if len(open_rows) == depth: - open_rows.append([]) - open_rows[depth].append(row) - parens[row] += 1 - if verbose >= 4: - print("bracket depth %s seen, col %s, visual min = %s" % - (depth, start[1], indent[depth])) - elif text in ')]}' and depth > 0: - # parent indents should not be more than this one - prev_indent = indent.pop() or last_indent[1] - hangs.pop() - for d in range(depth): - if indent[d] > prev_indent: - indent[d] = 0 - for ind in list(indent_chances): - if ind >= prev_indent: - del indent_chances[ind] - del open_rows[depth + 1:] - depth -= 1 - if depth: - indent_chances[indent[depth]] = True - for idx in range(row, -1, -1): - if parens[idx]: - parens[idx] -= 1 - break - assert len(indent) == depth + 1 - if start[1] not in indent_chances: - # allow to line up tokens - indent_chances[start[1]] = text + # First, read the default values + (new_options, __) = parser.parse_args([]) - last_token_multiline = (start[0] != end[0]) - if last_token_multiline: - rel_indent[end[0] - first_row] = rel_indent[row] + # Second, parse the configuration + for opt in config.options(pep8_section): + if opt.replace('_', '-') not in parser.config_options: + print(" unknown option '%s' ignored" % opt) + continue + if options.verbose > 1: + print(" %s = %s" % (opt, config.get(pep8_section, opt))) + normalized_opt = opt.replace('-', '_') + opt_type = option_list[normalized_opt] + if opt_type in ('int', 'count'): + value = config.getint(pep8_section, opt) + elif opt_type == 'string': + value = config.get(pep8_section, opt) + if normalized_opt == 'exclude': + value = normalize_paths(value, local_dir) + else: + assert opt_type in ('store_true', 'store_false') + value = config.getboolean(pep8_section, opt) + setattr(new_options, normalized_opt, value) - if indent_next and expand_indent(line) == indent_level + 4: - pos = (start[0], indent[0] + 4) - if visual_indent: - code = "E129 visually indented line" - else: - code = "E125 continuation line" - yield pos, "%s with same indent as next logical line" % code + # Third, overwrite with the command-line options + (options, __) = parser.parse_args(arglist, values=new_options) + options.doctest = options.testsuite = False + return options -def whitespace_before_parameters(logical_line, tokens): - r"""Avoid extraneous whitespace. +def _parse_multi_options(options, split_token=','): + r"""Split and strip and discard empties. - Avoid extraneous whitespace in the following situations: - - before the open parenthesis that starts the argument list of a - function call. - - before the open parenthesis that starts an indexing or slicing. + Turns the following: - Okay: spam(1) - E211: spam (1) + A, + B, - Okay: dict['key'] = list[index] - E211: dict ['key'] = list[index] - E211: dict['key'] = list [index] + into ["A", "B"] """ - prev_type, prev_text, __, prev_end, __ = tokens[0] - for index in range(1, len(tokens)): - token_type, text, start, end, __ = tokens[index] - if (token_type == tokenize.OP and - text in '([' and - start != prev_end and - (prev_type == tokenize.NAME or prev_text in '}])') and - # Syntax "class A (B):" is allowed, but avoid it - (index < 2 or tokens[index - 2][1] != 'class') and - # Allow "return (a.foo for a in range(5))" - not keyword.iskeyword(prev_text)): - yield prev_end, "E211 whitespace before '%s'" % text - prev_type = token_type - prev_text = text - prev_end = end + if options: + return [o.strip() for o in options.split(split_token) if o.strip()] + else: + return options -def whitespace_around_operator(logical_line): - r"""Avoid extraneous whitespace around an operator. +def get_parser(prog='pep8', version=__version__): + parser = OptionParser(prog=prog, version=version, + usage="%prog [options] input ...") + parser.config_options = [ + 'exclude', 'filename', 'select', 'ignore', 'max-line-length', + 'hang-closing', 'count', 'format', 'quiet', 'show-pep8', + 'show-source', 'statistics', 'verbose'] + parser.add_option('-v', '--verbose', default=0, action='count', + help="print status messages, or debug with -vv") + parser.add_option('-q', '--quiet', default=0, action='count', + help="report only file names, or nothing with -qq") + parser.add_option('-r', '--repeat', default=True, action='store_true', + help="(obsolete) show all occurrences of the same error") + parser.add_option('--first', action='store_false', dest='repeat', + help="show first occurrence of each error") + parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %default)") + parser.add_option('--filename', metavar='patterns', default='*.py', + help="when parsing directories, only check filenames " + "matching these comma separated patterns " + "(default: %default)") + parser.add_option('--select', metavar='errors', default='', + help="select errors and warnings (e.g. E,W6)") + parser.add_option('--ignore', metavar='errors', default='', + help="skip errors and warnings (e.g. E4,W) " + "(default: %s)" % DEFAULT_IGNORE) + parser.add_option('--show-source', action='store_true', + help="show source code for each error") + parser.add_option('--show-pep8', action='store_true', + help="show text of PEP 8 for each error " + "(implies --first)") + parser.add_option('--statistics', action='store_true', + help="count errors and warnings") + parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + parser.add_option('--max-line-length', type='int', metavar='n', + default=MAX_LINE_LENGTH, + help="set maximum allowed line length " + "(default: %default)") + parser.add_option('--hang-closing', action='store_true', + help="hang closing bracket instead of matching " + "indentation of opening bracket's line") + parser.add_option('--format', metavar='format', default='default', + help="set the error format [default|pylint|]") + parser.add_option('--diff', action='store_true', + help="report changes only within line number ranges in " + "the unified diff received on STDIN") + group = parser.add_option_group("Testing Options") + if os.path.exists(TESTSUITE_PATH): + group.add_option('--testsuite', metavar='dir', + help="run regression tests from dir") + group.add_option('--doctest', action='store_true', + help="run doctest on myself") + group.add_option('--benchmark', action='store_true', + help="measure processing speed") + return parser - Okay: a = 12 + 3 - E221: a = 4 + 5 - E222: a = 4 + 5 - E223: a = 4\t+ 5 - E224: a = 4 +\t5 - """ - for match in OPERATOR_REGEX.finditer(logical_line): - before, after = match.groups() - if '\t' in before: - yield match.start(1), "E223 tab before operator" - elif len(before) > 1: - yield match.start(1), "E221 multiple spaces before operator" - - if '\t' in after: - yield match.start(2), "E224 tab after operator" - elif len(after) > 1: - yield match.start(2), "E222 multiple spaces after operator" +def process_options(arglist=None, parse_argv=False, config_file=None, + parser=None): + """Process options passed either via arglist or via command line args. + Passing in the ``config_file`` parameter allows other tools, such as flake8 + to specify their own options to be processed in pep8. + """ + if not parser: + parser = get_parser() + if not parser.has_option('--config'): + group = parser.add_option_group("Configuration", description=( + "The project options are read from the [%s] section of the " + "tox.ini file or the setup.cfg file located in any parent folder " + "of the path(s) being processed. Allowed options are: %s." % + (parser.prog, ', '.join(parser.config_options)))) + group.add_option('--config', metavar='path', default=config_file, + help="user config file location") + # Don't read the command line if the module is used as a library. + if not arglist and not parse_argv: + arglist = [] + # If parse_argv is True and arglist is None, arguments are + # parsed from the command line (sys.argv) + (options, args) = parser.parse_args(arglist) + options.reporter = None -def missing_whitespace_around_operator(logical_line, tokens): - r"""Surround operators with a single space on either side. + if options.ensure_value('testsuite', False): + args.append(options.testsuite) + elif not options.ensure_value('doctest', False): + if parse_argv and not args: + if options.diff or any(os.path.exists(name) + for name in PROJECT_CONFIG): + args = ['.'] + else: + parser.error('input not specified') + options = read_config(options, args, arglist, parser) + options.reporter = parse_argv and options.quiet == 1 and FileReport - - Always surround these binary operators with a single space on - either side: assignment (=), augmented assignment (+=, -= etc.), - comparisons (==, <, >, !=, <=, >=, in, not in, is, is not), - Booleans (and, or, not). + options.filename = _parse_multi_options(options.filename) + options.exclude = normalize_paths(options.exclude) + options.select = _parse_multi_options(options.select) + options.ignore = _parse_multi_options(options.ignore) - - If operators with different priorities are used, consider adding - whitespace around the operators with the lowest priorities. + if options.diff: + options.reporter = DiffReport + stdin = stdin_get_value() + options.selected_lines = parse_udiff(stdin, options.filename, args[0]) + args = sorted(options.selected_lines) - Okay: i = i + 1 - Okay: submitted += 1 - Okay: x = x * 2 - 1 - Okay: hypot2 = x * x + y * y - Okay: c = (a + b) * (a - b) - Okay: foo(bar, key='word', *args, **kwargs) - Okay: alpha[:-i] + return options, args - E225: i=i+1 - E225: submitted +=1 - E225: x = x /2 - 1 - E225: z = x **y - E226: c = (a+b) * (a-b) - E226: hypot2 = x*x + y*y - E227: c = a|b - E228: msg = fmt%(errno, errmsg) - """ - parens = 0 - need_space = False - prev_type = tokenize.OP - prev_text = prev_end = None - for token_type, text, start, end, line in tokens: - if token_type in SKIP_COMMENTS: - continue - if text in ('(', 'lambda'): - parens += 1 - elif text == ')': - parens -= 1 - if need_space: - if start != prev_end: - # Found a (probably) needed space - if need_space is not True and not need_space[1]: - yield (need_space[0], - "E225 missing whitespace around operator") - need_space = False - elif text == '>' and prev_text in ('<', '-'): - # Tolerate the "<>" operator, even if running Python 3 - # Deal with Python 3's annotated return value "->" - pass - else: - if need_space is True or need_space[1]: - # A needed trailing space was not found - yield prev_end, "E225 missing whitespace around operator" - elif prev_text != '**': - code, optype = 'E226', 'arithmetic' - if prev_text == '%': - code, optype = 'E228', 'modulo' - elif prev_text not in ARITHMETIC_OP: - code, optype = 'E227', 'bitwise or shift' - yield (need_space[0], "%s missing whitespace " - "around %s operator" % (code, optype)) - need_space = False - elif token_type == tokenize.OP and prev_end is not None: - if text == '=' and parens: - # Allow keyword args or defaults: foo(bar=None). - pass - elif text in WS_NEEDED_OPERATORS: - need_space = True - elif text in UNARY_OPERATORS: - # Check if the operator is being used as a binary operator - # Allow unary operators: -123, -x, +1. - # Allow argument unpacking: foo(*args, **kwargs). - if (prev_text in '}])' if prev_type == tokenize.OP - else prev_text not in KEYWORDS): - need_space = None - elif text in WS_OPTIONAL_OPERATORS: - need_space = None - if need_space is None: - # Surrounding space is optional, but ensure that - # trailing space matches opening space - need_space = (prev_end, start != prev_end) - elif need_space and start == prev_end: - # A needed opening space was not found - yield prev_end, "E225 missing whitespace around operator" - need_space = False - prev_type = token_type - prev_text = text - prev_end = end +class StyleGuide(object): + """Initialize a PEP-8 instance with few options.""" + def __init__(self, *args, **kwargs): + # build options from the command line + self.checker_class = kwargs.pop('checker_class', Checker) + parse_argv = kwargs.pop('parse_argv', False) + config_file = kwargs.pop('config_file', False) + parser = kwargs.pop('parser', None) + # build options from dict + options_dict = dict(*args, **kwargs) + arglist = None if parse_argv else options_dict.get('paths', None) + options, self.paths = process_options( + arglist, parse_argv, config_file, parser) + if options_dict: + options.__dict__.update(options_dict) + if 'paths' in options_dict: + self.paths = options_dict['paths'] -def whitespace_around_comma(logical_line): - r"""Avoid extraneous whitespace after a comma or a colon. + self.runner = self.input_file + self.options = options - Note: these checks are disabled by default + if not options.reporter: + options.reporter = BaseReport if options.quiet else StandardReport - Okay: a = (1, 2) - E241: a = (1, 2) - E242: a = (1,\t2) - """ - line = logical_line - for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): - found = m.start() + 1 - if '\t' in m.group(): - yield found, "E242 tab after '%s'" % m.group()[0] + options.select = tuple(options.select or ()) + if not (options.select or options.ignore or + options.testsuite or options.doctest) and DEFAULT_IGNORE: + # The default choice: ignore controversial checks + options.ignore = tuple(DEFAULT_IGNORE.split(',')) else: - yield found, "E241 multiple spaces after '%s'" % m.group()[0] + # Ignore all checks which are not explicitly selected + options.ignore = ('',) if options.select else tuple(options.ignore) + options.benchmark_keys = BENCHMARK_KEYS[:] + options.ignore_code = self.ignore_code + options.physical_checks = self.get_checks('physical_line') + options.logical_checks = self.get_checks('logical_line') + options.ast_checks = self.get_checks('tree') + self.init_report() + def init_report(self, reporter=None): + """Initialize the report instance.""" + self.options.report = (reporter or self.options.reporter)(self.options) + return self.options.report -def whitespace_around_named_parameter_equals(logical_line, tokens): - r"""Don't use spaces around the '=' sign in function arguments. + def check_files(self, paths=None): + """Run all checks on the paths.""" + if paths is None: + paths = self.paths + report = self.options.report + runner = self.runner + report.start() + try: + for path in paths: + if os.path.isdir(path): + self.input_dir(path) + elif not self.excluded(path): + runner(path) + except KeyboardInterrupt: + print('... stopped') + report.stop() + return report - Don't use spaces around the '=' sign when used to indicate a - keyword argument or a default parameter value. + def input_file(self, filename, lines=None, expected=None, line_offset=0): + """Run all checks on a Python source file.""" + if self.options.verbose: + print('checking %s' % filename) + fchecker = self.checker_class( + filename, lines=lines, options=self.options) + return fchecker.check_all(expected=expected, line_offset=line_offset) - Okay: def complex(real, imag=0.0): - Okay: return magic(r=real, i=imag) - Okay: boolean(a == b) - Okay: boolean(a != b) - Okay: boolean(a <= b) - Okay: boolean(a >= b) - Okay: def foo(arg: int = 42): - - E251: def complex(real, imag = 0.0): - E251: return magic(r = real, i = imag) - """ - parens = 0 - no_space = False - prev_end = None - annotated_func_arg = False - in_def = logical_line.startswith('def') - message = "E251 unexpected spaces around keyword / parameter equals" - for token_type, text, start, end, line in tokens: - if token_type == tokenize.NL: - continue - if no_space: - no_space = False - if start != prev_end: - yield (prev_end, message) - if token_type == tokenize.OP: - if text == '(': - parens += 1 - elif text == ')': - parens -= 1 - elif in_def and text == ':' and parens == 1: - annotated_func_arg = True - elif parens and text == ',' and parens == 1: - annotated_func_arg = False - elif parens and text == '=' and not annotated_func_arg: - no_space = True - if start != prev_end: - yield (prev_end, message) - if not parens: - annotated_func_arg = False - - prev_end = end + def input_dir(self, dirname): + """Check all files in this directory and all subdirectories.""" + dirname = dirname.rstrip('/') + if self.excluded(dirname): + return 0 + counters = self.options.report.counters + verbose = self.options.verbose + filepatterns = self.options.filename + runner = self.runner + for root, dirs, files in os.walk(dirname): + if verbose: + print('directory ' + root) + counters['directories'] += 1 + for subdir in sorted(dirs): + if self.excluded(subdir, root): + dirs.remove(subdir) + for filename in sorted(files): + # contain a pattern that matches? + if ((filename_match(filename, filepatterns) and + not self.excluded(filename, root))): + runner(os.path.join(root, filename)) + def excluded(self, filename, parent=None): + """Check if the file should be excluded. -def whitespace_before_comment(logical_line, tokens): - r"""Separate inline comments by at least two spaces. + Check if 'options.exclude' contains a pattern that matches filename. + """ + if not self.options.exclude: + return False + basename = os.path.basename(filename) + if filename_match(basename, self.options.exclude): + return True + if parent: + filename = os.path.join(parent, filename) + filename = os.path.abspath(filename) + return filename_match(filename, self.options.exclude) - An inline comment is a comment on the same line as a statement. Inline - comments should be separated by at least two spaces from the statement. - They should start with a # and a single space. + def ignore_code(self, code): + """Check if the error code should be ignored. - Each line of a block comment starts with a # and a single space - (unless it is indented text inside the comment). + If 'options.select' contains a prefix of the error code, + return False. Else, if 'options.ignore' contains a prefix of + the error code, return True. + """ + if len(code) < 4 and any(s.startswith(code) + for s in self.options.select): + return False + return (code.startswith(self.options.ignore) and + not code.startswith(self.options.select)) - Okay: x = x + 1 # Increment x - Okay: x = x + 1 # Increment x - Okay: # Block comment - E261: x = x + 1 # Increment x - E262: x = x + 1 #Increment x - E262: x = x + 1 # Increment x - E265: #Block comment - E266: ### Block comment - """ - prev_end = (0, 0) - for token_type, text, start, end, line in tokens: - if token_type == tokenize.COMMENT: - inline_comment = line[:start[1]].strip() - if inline_comment: - if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: - yield (prev_end, - "E261 at least two spaces before inline comment") - symbol, sp, comment = text.partition(' ') - bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#') - if inline_comment: - if bad_prefix or comment[:1] in WHITESPACE: - yield start, "E262 inline comment should start with '# '" - elif bad_prefix and (bad_prefix != '!' or start[0] > 1): - if bad_prefix != '#': - yield start, "E265 block comment should start with '# '" - elif comment: - yield start, "E266 too many leading '#' for block comment" - elif token_type != tokenize.NL: - prev_end = end + def get_checks(self, argument_name): + """Get all the checks for this category. + Find all globally visible functions where the first argument name + starts with argument_name and which contain selected tests. + """ + checks = [] + for check, attrs in _checks[argument_name].items(): + (codes, args) = attrs + if any(not (code and self.ignore_code(code)) for code in codes): + checks.append((check.__name__, check, args)) + return sorted(checks) -def imports_on_separate_lines(logical_line): - r"""Imports should usually be on separate lines. - Okay: import os\nimport sys - E401: import sys, os +class Checker(object): + """Load a Python source file, tokenize it, check coding style.""" - Okay: from subprocess import Popen, PIPE - Okay: from myclas import MyClass - Okay: from foo.bar.yourclass import YourClass - Okay: import myclass - Okay: import foo.bar.yourclass - """ - line = logical_line - if line.startswith('import '): - found = line.find(',') - if -1 < found and ';' not in line[:found]: - yield found, "E401 multiple imports on one line" + def __init__(self, filename=None, lines=None, + options=None, report=None, **kwargs): + if options is None: + options = StyleGuide(kwargs).options + else: + assert not kwargs + self._io_error = None + self._physical_checks = options.physical_checks + self._logical_checks = options.logical_checks + self._ast_checks = options.ast_checks + self.max_line_length = options.max_line_length + self.multiline = False # in a multiline string? + self.hang_closing = options.hang_closing + self.verbose = options.verbose + self.filename = filename + # Dictionary where a checker can store its custom state. + self._checker_states = {} + if filename is None: + self.filename = 'stdin' + self.lines = lines or [] + elif filename == '-': + self.filename = 'stdin' + self.lines = stdin_get_value().splitlines(True) + elif lines is None: + try: + self.lines = readlines(filename) + except IOError: + (exc_type, exc) = sys.exc_info()[:2] + self._io_error = '%s: %s' % (exc_type.__name__, exc) + self.lines = [] + else: + self.lines = lines + if self.lines: + ord0 = ord(self.lines[0][0]) + if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM + if ord0 == 0xfeff: + self.lines[0] = self.lines[0][1:] + elif self.lines[0][:3] == '\xef\xbb\xbf': + self.lines[0] = self.lines[0][3:] + self.report = report or options.report + self.report_error = self.report.error + self._checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} + def report_invalid_syntax(self): + """Check if the syntax is valid.""" + (exc_type, exc) = sys.exc_info()[:2] + if len(exc.args) > 1: + offset = exc.args[1] + if len(offset) > 2: + offset = offset[1:3] + else: + offset = (1, 0) + self.report_error(offset[0], offset[1] or 0, + '%s %s: %s' % (E901, exc_type.__name__, exc.args[0]), + self.report_invalid_syntax) -def module_imports_on_top_of_file( - logical_line, indent_level, checker_state, noqa): - r"""Imports are always put at the top of the file, just after any module - comments and docstrings, and before module globals and constants. + def readline(self): + """Get the next line from the input buffer.""" + if self.line_number >= self.total_lines: + return '' + line = self.lines[self.line_number] + self.line_number += 1 + if self.indent_char is None and line[:1] in WHITESPACE: + self.indent_char = line[0] + return line - Okay: import os - Okay: # this is a comment\nimport os - Okay: '''this is a module docstring'''\nimport os - Okay: r'''this is a module docstring'''\nimport os - Okay: try:\n import x\nexcept:\n pass\nelse:\n pass\nimport y - Okay: try:\n import x\nexcept:\n pass\nfinally:\n pass\nimport y - E402: a=1\nimport os - E402: 'One string'\n"Two string"\nimport os - E402: a=1\nfrom sys import x + def run_check(self, check, argument_names): + """Run a check plugin.""" + arguments = [] + for name in argument_names: + arguments.append(getattr(self, name)) + return check(*arguments) - Okay: if x:\n import os - """ - def is_string_literal(line): - if line[0] in 'uUbB': - line = line[1:] - if line and line[0] in 'rR': - line = line[1:] - return line and (line[0] == '"' or line[0] == "'") + def init_checker_state(self, name, argument_names): + """ Prepares a custom state for the specific checker plugin.""" + if 'checker_state' in argument_names: + self.checker_state = self._checker_states.setdefault(name, {}) - allowed_try_keywords = ('try', 'except', 'else', 'finally') + def check_physical(self, line): + """Run all physical checks on a raw input line.""" + self.line = line + for name, check, argument_names in self._physical_checks: + self.init_checker_state(name, argument_names) + result = self.run_check(check, argument_names) + if result is not None: + (offset, text) = result + self.report_error(self.line_number, offset, text, check) + if text[:4] == E101: + self.indent_char = line[0] - if indent_level: # Allow imports in conditional statements or functions - return - if not logical_line: # Allow empty lines or comments - return - if noqa: - return - line = logical_line - if line.startswith('import ') or line.startswith('from '): - if checker_state.get('seen_non_imports', False): - yield 0, "E402 module level import not at top of file" - elif any(line.startswith(kw) for kw in allowed_try_keywords): - # Allow try, except, else, finally keywords intermixed with imports in - # order to support conditional importing - return - elif is_string_literal(line): - # The first literal is a docstring, allow it. Otherwise, report error. - if checker_state.get('seen_docstring', False): - checker_state['seen_non_imports'] = True - else: - checker_state['seen_docstring'] = True - else: - checker_state['seen_non_imports'] = True + def build_tokens_line(self): + """Build a logical line from tokens.""" + logical = [] + comments = [] + length = 0 + prev_row = prev_col = mapping = None + for token_type, text, start, end, line in self.tokens: + if token_type in SKIP_TOKENS: + continue + if not mapping: + mapping = [(0, start)] + if token_type == tokenize.COMMENT: + comments.append(text) + continue + if token_type == tokenize.STRING: + text = mute_string(text) + if prev_row: + (start_row, start_col) = start + if prev_row != start_row: # different row + prev_text = self.lines[prev_row - 1][prev_col - 1] + if prev_text == ',' or (prev_text not in '{[(' and + text not in '}])'): + text = ' ' + text + elif prev_col != start_col: # different column + text = line[prev_col:start_col] + text + logical.append(text) + length += len(text) + mapping.append((length, end)) + (prev_row, prev_col) = end + self.line = ''.join(logical) + self.noqa = comments and noqa(''.join(comments)) + return mapping + def check_logical(self): + """Build a line from tokens and run all logical checks on it.""" + self.report.increment_logical_line() + mapping = self.build_tokens_line() -def compound_statements(logical_line): - r"""Compound statements (on the same line) are generally discouraged. + if not mapping: + return - While sometimes it's okay to put an if/for/while with a small body - on the same line, never do this for multi-clause statements. - Also avoid folding such long lines! + (start_row, start_col) = mapping[0][1] + start_line = self.lines[start_row - 1] + self.indent_level = expand_indent(start_line[:start_col]) + if self.blank_before < self.blank_lines: + self.blank_before = self.blank_lines + if self.verbose >= 2: + print(self.line[:80].rstrip()) + for name, check, argument_names in self._logical_checks: + if self.verbose >= 4: + print(' ' + name) + self.init_checker_state(name, argument_names) + for offset, text in self.run_check(check, argument_names) or (): + if not isinstance(offset, tuple): + for token_offset, pos in mapping: + if offset <= token_offset: + break + offset = (pos[0], pos[1] + offset - token_offset) + self.report_error(offset[0], offset[1], text, check) + if self.line: + self.previous_indent_level = self.indent_level + self.previous_line = self.line + self.blank_lines = 0 + self.tokens = [] - Always use a def statement instead of an assignment statement that - binds a lambda expression directly to a name. + def check_ast(self): + """Build the file's AST and run all AST checks.""" + try: + tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) + except (ValueError, SyntaxError, TypeError): + return self.report_invalid_syntax() + for name, cls, __ in self._ast_checks: + checker = cls(tree, self.filename) + for lineno, offset, text, check in checker.run(): + if not self.lines or not noqa(self.lines[lineno - 1]): + self.report_error(lineno, offset, text, check) - Okay: if foo == 'blah':\n do_blah_thing() - Okay: do_one() - Okay: do_two() - Okay: do_three() + def generate_tokens(self): + """Tokenize the file, run physical line checks and yield tokens.""" + if self._io_error: + self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) + tokengen = tokenize.generate_tokens(self.readline) + try: + for token in tokengen: + if token[2][0] > self.total_lines: + return + self.maybe_check_physical(token) + yield token + except (SyntaxError, tokenize.TokenError): + self.report_invalid_syntax() - E701: if foo == 'blah': do_blah_thing() - E701: for x in lst: total += x - E701: while t < 10: t = delay() - E701: if foo == 'blah': do_blah_thing() - E701: else: do_non_blah_thing() - E701: try: something() - E701: finally: cleanup() - E701: if foo == 'blah': one(); two(); three() - E702: do_one(); do_two(); do_three() - E703: do_four(); # useless semicolon - E704: def f(x): return 2*x - E731: f = lambda x: 2*x - """ - line = logical_line - last_char = len(line) - 1 - found = line.find(':') - while -1 < found < last_char: - before = line[:found] - if ((before.count('{') <= before.count('}') and # {'a': 1} (dict) - before.count('[') <= before.count(']') and # [1:2] (slice) - before.count('(') <= before.count(')'))): # (annotation) - lambda_kw = LAMBDA_REGEX.search(before) - if lambda_kw: - before = line[:lambda_kw.start()].rstrip() - if before[-1:] == '=' and isidentifier(before[:-1].strip()): - yield 0, ("E731 do not assign a lambda expression, use a " - "def") - break - if before.startswith('def '): - yield 0, "E704 multiple statements on one line (def)" - else: - yield found, "E701 multiple statements on one line (colon)" - found = line.find(':', found + 1) - found = line.find(';') - while -1 < found: - if found < last_char: - yield found, "E702 multiple statements on one line (semicolon)" + def maybe_check_physical(self, token): + """If appropriate (based on token), check current physical line(s).""" + # Called after every token, but act only on end of line. + if _is_eol_token(token): + # Obviously, a newline token ends a single physical line. + self.check_physical(token[4]) + elif token[0] == tokenize.STRING and '\n' in token[1]: + # Less obviously, a string that contains newlines is a + # multiline string, either triple-quoted or with internal + # newlines backslash-escaped. Check every physical line in the + # string *except* for the last one: its newline is outside of + # the multiline string, so we consider it a regular physical + # line, and will check it like any other physical line. + # + # Subtleties: + # - we don't *completely* ignore the last line; if it contains + # the magical "# noqa" comment, we disable all physical + # checks for the entire multiline string + # - have to wind self.line_number back because initially it + # points to the last line of the string, and we want + # check_physical() to give accurate feedback + if noqa(token[4]): + return + self.multiline = True + self.line_number = token[2][0] + for line in token[1].split('\n')[:-1]: + self.check_physical(line + '\n') + self.line_number += 1 + self.multiline = False + + def check_all(self, expected=None, line_offset=0): + """Run all checks on the input file.""" + self.report.init_file(self.filename, self.lines, expected, line_offset) + self.total_lines = len(self.lines) + if self._ast_checks: + self.check_ast() + self.line_number = 0 + self.indent_char = None + self.indent_level = self.previous_indent_level = 0 + self.previous_line = '' + self.tokens = [] + self.blank_lines = self.blank_before = 0 + parens = 0 + for token in self.generate_tokens(): + self.tokens.append(token) + token_type, text = token[0:2] + if self.verbose >= 3: + if token[2][0] == token[3][0]: + pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) + else: + pos = 'l.%s' % token[3][0] + print('l.%s\t%s\t%s\t%r' % + (token[2][0], pos, tokenize.tok_name[token[0]], text)) + if token_type == tokenize.OP: + if text in '([{': + parens += 1 + elif text in '}])': + parens -= 1 + elif not parens: + if token_type in NEWLINE: + if token_type == tokenize.NEWLINE: + self.check_logical() + self.blank_before = 0 + elif len(self.tokens) == 1: + # The physical line contains only this token. + self.blank_lines += 1 + del self.tokens[0] + else: + self.check_logical() + elif COMMENT_WITH_NL and token_type == tokenize.COMMENT: + if len(self.tokens) == 1: + # The comment also ends a physical line + token = list(token) + token[1] = text.rstrip('\r\n') + token[3] = (token[2][0], token[2][1] + len(token[1])) + self.tokens = [tuple(token)] + self.check_logical() + if self.tokens: + self.check_physical(self.lines[-1]) + self.check_logical() + return self.report.get_file_results() + + def _add_check(self, check, kind, codes, args): + if check in self._checks[kind]: + self._checks[kind][check][0].extend(codes or []) else: - yield found, "E703 statement ends with a semicolon" - found = line.find(';', found + 1) + self._checks[kind][check] = (codes or [''], args) + + def physical(self, codes): + def decorator(func): + # def wrapper(*args, **kwargs): + # return func(*args, **kwargs) + args = _get_parameters(func) + self._add_check(func, 'physical_line', codes, args) + return func + return decorator + + def logical(self, codes): + def decorator(func): + # def wrapper(*args, **kwargs): + # return func(*args, **kwargs) + args = _get_parameters(func) + self._add_check(func, 'logical_line', codes, args) + return func + return decorator + + +check = Checker() +_checks = check._checks +############################################################################## +# Plugins (check functions) for physical lines +############################################################################## -def explicit_line_join(logical_line, tokens): - r"""Avoid explicit line join between brackets. - The preferred way of wrapping long lines is by using Python's implied line - continuation inside parentheses, brackets and braces. Long lines can be - broken over multiple lines by wrapping expressions in parentheses. These - should be used in preference to using a backslash for line continuation. +@check.physical(codes=(E101)) +def tabs_or_spaces(line, indent_char): + r"""Never mix tabs and spaces. - E502: aaa = [123, \\n 123] - E502: aaa = ("bbb " \\n "ccc") + The most popular way of indenting Python is with spaces only. The + second-most popular way is with tabs only. Code indented with a mixture + of tabs and spaces should be converted to using spaces exclusively. When + invoking the Python command line interpreter with the -t option, it issues + warnings about code that illegally mixes tabs and spaces. When using -tt + these warnings become errors. These options are highly recommended! - Okay: aaa = [123,\n 123] - Okay: aaa = ("bbb "\n "ccc") - Okay: aaa = "bbb " \\n "ccc" - Okay: aaa = 123 # \\ + Okay: if a == 0:\n a = 1\n b = 1 + E101: if a == 0:\n a = 1\n\tb = 1 """ - prev_start = prev_end = parens = 0 - comment = False - backslash = None - for token_type, text, start, end, line in tokens: - if token_type == tokenize.COMMENT: - comment = True - if start[0] != prev_start and parens and backslash and not comment: - yield backslash, "E502 the backslash is redundant between brackets" - if end[0] != prev_end: - if line.rstrip('\r\n').endswith('\\'): - backslash = (end[0], len(line.splitlines()[-1]) - 1) - else: - backslash = None - prev_start = prev_end = end[0] - else: - prev_start = start[0] - if token_type == tokenize.OP: - if text in '([{': - parens += 1 - elif text in ')]}': - parens -= 1 + indent = INDENT_REGEX.match(line).group(1) + for offset, char in enumerate(indent): + if char != indent_char: + return offset, "E101 indentation contains mixed spaces and tabs" -def break_around_binary_operator(logical_line, tokens): - r""" - Avoid breaks before binary operators. +@check.physical(codes=(W191)) +def tabs_obsolete(line): + r"""For new projects, spaces-only are strongly recommended over tabs. - The preferred place to break around a binary operator is after the - operator, not before it. + Okay: if True:\n return + W191: if True:\n\treturn + """ + indent = INDENT_REGEX.match(line).group(1) + if '\t' in indent: + return indent.index('\t'), "W191 indentation contains tabs" - W503: (width == 0\n + height == 0) - W503: (width == 0\n and height == 0) - Okay: (width == 0 +\n height == 0) - Okay: foo(\n -x) - Okay: foo(x\n []) - Okay: x = '''\n''' + '' - Okay: foo(x,\n -y) - Okay: foo(x, # comment\n -y) +@check.physical(codes=(W291, W293)) +def trailing_whitespace(line): + r"""Trailing whitespace is superfluous. + + The warning returned varies on whether the line itself is blank, for easier + filtering for those who want to indent their blank lines. + + Okay: spam(1)\n# + W291: spam(1) \n# + W293: class Foo(object):\n \n bang = 12 """ - def is_binary_operator(token_type, text): - # The % character is strictly speaking a binary operator, but the - # common usage seems to be to put it next to the format parameters, - # after a line break. - return ((token_type == tokenize.OP or text in ['and', 'or']) and - text not in "()[]{},:.;@=%") - - line_break = False - unary_context = True - for token_type, text, start, end, line in tokens: - if token_type == tokenize.COMMENT: - continue - if ('\n' in text or '\r' in text) and token_type != tokenize.STRING: - line_break = True - else: - if (is_binary_operator(token_type, text) and line_break and - not unary_context): - yield start, "W503 line break before binary operator" - unary_context = text in '([{,;' - line_break = False + line = line.rstrip('\n') # chr(10), newline + line = line.rstrip('\r') # chr(13), carriage return + line = line.rstrip('\x0c') # chr(12), form feed, ^L + stripped = line.rstrip(' \t\v') + if line != stripped: + if stripped: + return len(stripped), "W291 trailing whitespace" + else: + return 0, "W293 blank line contains whitespace" -def comparison_to_singleton(logical_line, noqa): - r"""Comparison to singletons should use "is" or "is not". - - Comparisons to singletons like None should always be done - with "is" or "is not", never the equality operators. +@check.physical(codes=(W391, W292)) +def trailing_blank_lines(line, lines, line_number, total_lines): + r"""Trailing blank lines are superfluous. - Okay: if arg is not None: - E711: if arg != None: - E711: if None == arg: - E712: if arg == True: - E712: if False == arg: + Okay: spam(1) + W391: spam(1)\n - Also, beware of writing if x when you really mean if x is not None -- - e.g. when testing whether a variable or argument that defaults to None was - set to some other value. The other value might have a type (such as a - container) that could be false in a boolean context! + However the last line should end with a new line (warning W292). """ - match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line) - if match: - singleton = match.group(1) or match.group(3) - same = (match.group(2) == '==') + if line_number == total_lines: + stripped_last_line = line.rstrip() + if not stripped_last_line: + return 0, "W391 blank line at end of file" + if stripped_last_line == line: + return len(line), "W292 no newline at end of file" - msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) - if singleton in ('None',): - code = 'E711' - else: - code = 'E712' - nonzero = ((singleton == 'True' and same) or - (singleton == 'False' and not same)) - msg += " or 'if %scond:'" % ('' if nonzero else 'not ') - yield match.start(2), ("%s comparison to %s should be %s" % - (code, singleton, msg)) +@check.physical(codes=(E501)) +def maximum_line_length(line, max_line_length, multiline): + r"""Limit all lines to a maximum of 79 characters. -def comparison_negative(logical_line): - r"""Negative comparison should be done using "not in" and "is not". + There are still many devices around that are limited to 80 character + lines; plus, limiting windows to 80 characters makes it possible to have + several windows side-by-side. The default wrapping on such devices looks + ugly. Therefore, please limit all lines to a maximum of 79 characters. + For flowing long blocks of text (docstrings or comments), limiting the + length to 72 characters is recommended. - Okay: if x not in y:\n pass - Okay: assert (X in Y or X is Z) - Okay: if not (X in Y):\n pass - Okay: zz = x is not y - E713: Z = not X in Y - E713: if not X.B in Y:\n pass - E714: if not X is Y:\n pass - E714: Z = not X.B is Y + Reports error E501. """ - match = COMPARE_NEGATIVE_REGEX.search(logical_line) - if match: - pos = match.start(1) - if match.group(2) == 'in': - yield pos, "E713 test for membership should be 'not in'" - else: - yield pos, "E714 test for object identity should be 'is not'" + line = line.rstrip() + length = len(line) + if length > max_line_length and not noqa(line): + # Special case for long URLs in multi-line docstrings or comments, + # but still report the error when the 72 first chars are whitespaces. + chunks = line.split() + if ((len(chunks) == 1 and multiline) or + (len(chunks) == 2 and chunks[0] == '#')) and \ + len(line) - len(chunks[-1]) < max_line_length - 7: + return + if hasattr(line, 'decode'): # Python 2 + # The line could contain multi-byte characters + try: + length = len(line.decode('utf-8')) + except UnicodeError: + pass + if length > max_line_length: + return (max_line_length, "E501 line too long " + "(%d > %d characters)" % (length, max_line_length)) -def comparison_type(logical_line, noqa): - r"""Object type comparisons should always use isinstance(). +############################################################################## +# Plugins (check functions) for logical lines +############################################################################## - Do not compare types directly. - Okay: if isinstance(obj, int): - E721: if type(obj) is type(1): +@check.logical(codes=(E301, E302, E303, E304)) +def blank_lines(line, blank_lines, indent_level, line_number, + blank_before, previous_line, previous_indent_level): + r"""Separate top-level function and class definitions with two blank lines. - When checking if an object is a string, keep in mind that it might be a - unicode string too! In Python 2.3, str and unicode have a common base - class, basestring, so you can do: + Method definitions inside a class are separated by a single blank line. - Okay: if isinstance(obj, basestring): - Okay: if type(a1) is type(b1): - """ - match = COMPARE_TYPE_REGEX.search(logical_line) - if match and not noqa: - inst = match.group(1) - if inst and isidentifier(inst) and inst not in SINGLETONS: - return # Allow comparison for types which are not obvious - yield match.start(), "E721 do not compare types, use 'isinstance()'" + Extra blank lines may be used (sparingly) to separate groups of related + functions. Blank lines may be omitted between a bunch of related + one-liners (e.g. a set of dummy implementations). + Use blank lines in functions, sparingly, to indicate logical sections. -def python_3000_has_key(logical_line, noqa): - r"""The {}.has_key() method is removed in Python 3: use the 'in' operator. + Okay: def a():\n pass\n\n\ndef b():\n pass + Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass - Okay: if "alph" in d:\n print d["alph"] - W601: assert d.has_key('alph') + E301: class Foo:\n b = 0\n def bar():\n pass + E302: def a():\n pass\n\ndef b(n):\n pass + E303: def a():\n pass\n\n\n\ndef b(n):\n pass + E303: def a():\n\n\n\n pass + E304: @decorator\n\ndef a():\n pass """ - pos = logical_line.find('.has_key(') - if pos > -1 and not noqa: - yield pos, "W601 .has_key() is deprecated, use 'in'" + if line_number < 3 and not previous_line: + return # Don't expect blank lines before the first line + if previous_line.startswith('@'): + if blank_lines: + yield 0, "E304 blank lines found after function decorator" + elif blank_lines > 2 or (indent_level and blank_lines == 2): + yield 0, "E303 too many blank lines (%d)" % blank_lines + elif line.startswith(('def ', 'class ', '@')): + if indent_level: + if not (blank_before or previous_indent_level < indent_level or + DOCSTRING_REGEX.match(previous_line)): + yield 0, "E301 expected 1 blank line, found 0" + elif blank_before != 2: + yield 0, "E302 expected 2 blank lines, found %d" % blank_before -def python_3000_raise_comma(logical_line): - r"""When raising an exception, use "raise ValueError('message')". +@check.logical(codes=(E201, E202, E203)) +def extraneous_whitespace(line): + r"""Avoid extraneous whitespace. - The older form is removed in Python 3. + Avoid extraneous whitespace in these situations: + - Immediately inside parentheses, brackets or braces. + - Immediately before a comma, semicolon, or colon. - Okay: raise DummyError("Message") - W602: raise DummyError, "Message" - """ - match = RAISE_COMMA_REGEX.match(logical_line) - if match and not RERAISE_COMMA_REGEX.match(logical_line): - yield match.end() - 1, "W602 deprecated form of raising exception" + Okay: spam(ham[1], {eggs: 2}) + E201: spam( ham[1], {eggs: 2}) + E201: spam(ham[ 1], {eggs: 2}) + E201: spam(ham[1], { eggs: 2}) + E202: spam(ham[1], {eggs: 2} ) + E202: spam(ham[1 ], {eggs: 2}) + E202: spam(ham[1], {eggs: 2 }) + E203: if x == 4: print x, y; x, y = y , x + E203: if x == 4: print x, y ; x, y = y, x + E203: if x == 4 : print x, y; x, y = y, x + """ + for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): + text = match.group() + char = text.strip() + found = match.start() + if text == char + ' ': + # assert char in '([{' + yield found + 1, "E201 whitespace after '%s'" % char + elif line[found - 1] != ',': + code = (E202 if char in '}])' else E203) # if char in ',;:' + yield found, "%s whitespace before '%s'" % (code, char) -def python_3000_not_equal(logical_line): - r"""New code should always use != instead of <>. - The older syntax is removed in Python 3. +@check.logical(codes=(E271, E272, E273, E274)) +def whitespace_around_keywords(line): + r"""Avoid extraneous whitespace around keywords. - Okay: if a != 'no': - W603: if a <> 'no': + Okay: True and False + E271: True and False + E272: True and False + E273: True and\tFalse + E274: True\tand False """ - pos = logical_line.find('<>') - if pos > -1: - yield pos, "W603 '<>' is deprecated, use '!='" - + for match in KEYWORD_REGEX.finditer(line): + before, after = match.groups() -def python_3000_backticks(logical_line): - r"""Backticks are removed in Python 3: use repr() instead. + if '\t' in before: + yield match.start(1), "E274 tab before keyword" + elif len(before) > 1: + yield match.start(1), "E272 multiple spaces before keyword" - Okay: val = repr(1 + 2) - W604: val = `1 + 2` - """ - pos = logical_line.find('`') - if pos > -1: - yield pos, "W604 backticks are deprecated, use 'repr()'" + if '\t' in after: + yield match.start(2), "E273 tab after keyword" + elif len(after) > 1: + yield match.start(2), "E271 multiple spaces after keyword" -############################################################################## -# Helper functions -############################################################################## +@check.logical(codes=(E231)) +def missing_whitespace(line): + r"""Each comma, semicolon or colon should be followed by whitespace. + Okay: [a, b] + Okay: (3,) + Okay: a[1:4] + Okay: a[:4] + Okay: a[1:] + Okay: a[1:4:2] + E231: ['a','b'] + E231: foo(bar,baz) + E231: [{'a':'b'}] + """ + for index in range(len(line) - 1): + char = line[index] + if char in ',;:' and line[index + 1] not in WHITESPACE: + before = line[:index] + if char == ':' and before.count('[') > before.count(']') and \ + before.rfind('{') < before.rfind('['): + continue # Slice syntax, no space required + if char == ',' and line[index + 1] == ')': + continue # Allow tuple with only one element: (3,) + yield index, "E231 missing whitespace after '%s'" % char -if '' == ''.encode(): - # Python 2: implicit encoding. - def readlines(filename): - """Read the source code.""" - with open(filename, 'rU') as f: - return f.readlines() - isidentifier = re.compile(r'[a-zA-Z_]\w*$').match - stdin_get_value = sys.stdin.read -else: - # Python 3 - def readlines(filename): - """Read the source code.""" - try: - with open(filename, 'rb') as f: - (coding, lines) = tokenize.detect_encoding(f.readline) - f = TextIOWrapper(f, coding, line_buffering=True) - return [l.decode(coding) for l in lines] + f.readlines() - except (LookupError, SyntaxError, UnicodeError): - # Fall back if file encoding is improperly declared - with open(filename, encoding='latin-1') as f: - return f.readlines() - isidentifier = str.isidentifier - def stdin_get_value(): - return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() -noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search +@check.logical(codes=(E111, E112, E113, E114, E115, E116)) +def indentation(line, previous_line, indent_char, + indent_level, previous_indent_level): + r"""Use 4 spaces per indentation level. + For really old code that you don't want to mess up, you can continue to + use 8-space tabs. -def expand_indent(line): - r"""Return the amount of indentation. + Okay: a = 1 + Okay: if a == 0:\n a = 1 + E111: a = 1 + E114: # a = 1 - Tabs are expanded to the next multiple of 8. + Okay: for item in items:\n pass + E112: for item in items:\npass + E115: for item in items:\n# Hi\n pass - >>> expand_indent(' ') - 4 - >>> expand_indent('\t') - 8 - >>> expand_indent(' \t') - 8 - >>> expand_indent(' \t') - 16 - """ - if '\t' not in line: - return len(line) - len(line.lstrip()) - result = 0 - for char in line: - if char == '\t': - result = result // 8 * 8 + 8 - elif char == ' ': - result += 1 - else: - break - return result - - -def mute_string(text): - """Replace contents with 'xxx' to prevent syntax matching. - - >>> mute_string('"abc"') - '"xxx"' - >>> mute_string("'''abc'''") - "'''xxx'''" - >>> mute_string("r'abc'") - "r'xxx'" + Okay: a = 1\nb = 2 + E113: a = 1\n b = 2 + E116: a = 1\n # b = 2 """ - # String modifiers (e.g. u or r) - start = text.index(text[-1]) + 1 - end = len(text) - 1 - # Triple quotes - if text[-3:] in ('"""', "'''"): - start += 2 - end -= 2 - return text[:start] + 'x' * (end - start) + text[end:] - - -def parse_udiff(diff, patterns=None, parent='.'): - """Return a dictionary of matching lines.""" - # For each file of the diff, the entry key is the filename, - # and the value is a set of row numbers to consider. - rv = {} - path = nrows = None - for line in diff.splitlines(): - if nrows: - if line[:1] != '-': - nrows -= 1 - continue - if line[:3] == '@@ ': - hunk_match = HUNK_REGEX.match(line) - (row, nrows) = [int(g or '1') for g in hunk_match.groups()] - rv[path].update(range(row, row + nrows)) - elif line[:3] == '+++': - path = line[4:].split('\t', 1)[0] - if path[:2] == 'b/': - path = path[2:] - rv[path] = set() - return dict([(os.path.join(parent, path), rows) - for (path, rows) in rv.items() - if rows and filename_match(path, patterns)]) + c = 0 if line else 3 + tmpl = "E11%d %s" if line else "E11%d %s (comment)" + if indent_level % 4: + yield 0, tmpl % (1 + c, "indentation is not a multiple of four") + indent_expect = previous_line.endswith(':') + if indent_expect and indent_level <= previous_indent_level: + yield 0, tmpl % (2 + c, "expected an indented block") + elif not indent_expect and indent_level > previous_indent_level: + yield 0, tmpl % (3 + c, "unexpected indentation") -def normalize_paths(value, parent=os.curdir): - """Parse a comma-separated list of paths. +@check.logical(codes=(E121, E122, E123, E124, E125, + E126, E127, E128, E129, E131)) +def continued_indentation(line, tokens, indent_level, hang_closing, + indent_char, noqa, verbose): + r"""Continuation lines indentation. - Return a list of absolute paths. - """ - if not value: - return [] - if isinstance(value, list): - return value - paths = [] - for path in value.split(','): - path = path.strip() - if '/' in path: - path = os.path.abspath(os.path.join(parent, path)) - paths.append(path.rstrip('/')) - return paths + Continuation lines should align wrapped elements either vertically + using Python's implicit line joining inside parentheses, brackets + and braces, or using a hanging indent. + When using a hanging indent these considerations should be applied: + - there should be no arguments on the first line, and + - further indentation should be used to clearly distinguish itself as a + continuation line. -def filename_match(filename, patterns, default=True): - """Check if patterns contains a pattern that matches filename. + Okay: a = (\n) + E123: a = (\n ) - If patterns is unspecified, this always returns True. + Okay: a = (\n 42) + E121: a = (\n 42) + E122: a = (\n42) + E123: a = (\n 42\n ) + E124: a = (24,\n 42\n) + E125: if (\n b):\n pass + E126: a = (\n 42) + E127: a = (24,\n 42) + E128: a = (24,\n 42) + E129: if (a or\n b):\n pass + E131: a = (\n 42\n 24) """ - if not patterns: - return default - return any(fnmatch(filename, pattern) for pattern in patterns) - + first_row = tokens[0][2][0] + nrows = 1 + tokens[-1][2][0] - first_row + if noqa or nrows == 1: + return -def _is_eol_token(token): - return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n' -if COMMENT_WITH_NL: - def _is_eol_token(token, _eol_token=_is_eol_token): - return _eol_token(token) or (token[0] == tokenize.COMMENT and - token[1] == token[4]) + # indent_next tells us whether the next block is indented; assuming + # that it is indented by 4 spaces, then we should not allow 4-space + # indents on the final continuation line; in turn, some other + # indents are allowed to have an extra 4 spaces. + indent_next = line.endswith(':') -############################################################################## -# Framework to run all checks -############################################################################## + row = depth = 0 + valid_hangs = (4,) if indent_char != '\t' else (4, 8) + # remember how many brackets were opened on each line + parens = [0] * nrows + # relative indents of physical lines + rel_indent = [0] * nrows + # for each depth, collect a list of opening rows + open_rows = [[0]] + # for each depth, memorize the hanging indentation + hangs = [None] + # visual indents + indent_chances = {} + last_indent = tokens[0][2] + visual_indent = None + last_token_multiline = False + # for each depth, memorize the visual indent column + indent = [last_indent[1]] + if verbose >= 3: + print(">>> " + tokens[0][4].rstrip()) + for token_type, text, start, end, line in tokens: -_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} + newline = row < start[0] - first_row + if newline: + row = start[0] - first_row + newline = not last_token_multiline and token_type not in NEWLINE + if newline: + # this is the beginning of a continuation line. + last_indent = start + if verbose >= 3: + print("... " + line.rstrip()) -def _get_parameters(function): - if sys.version_info >= (3, 3): - return list(inspect.signature(function).parameters) - else: - return inspect.getargspec(function)[0] + # record the initial indent. + rel_indent[row] = expand_indent(line) - indent_level + # identify closing bracket + close_bracket = (token_type == tokenize.OP and text in ']})') -def register_check(check, codes=None): - """Register a new check object.""" - def _add_check(check, kind, codes, args): - if check in _checks[kind]: - _checks[kind][check][0].extend(codes or []) - else: - _checks[kind][check] = (codes or [''], args) - if inspect.isfunction(check): - args = _get_parameters(check) - if args and args[0] in ('physical_line', 'logical_line'): - if codes is None: - codes = ERRORCODE_REGEX.findall(check.__doc__ or '') - _add_check(check, args[0], codes, args) - elif inspect.isclass(check): - if _get_parameters(check.__init__)[:2] == ['self', 'tree']: - _add_check(check, 'tree', codes, None) - - -def init_checks_registry(): - """Register all globally visible functions. - - The first argument name is either 'physical_line' or 'logical_line'. - """ - mod = inspect.getmodule(register_check) - for (name, function) in inspect.getmembers(mod, inspect.isfunction): - register_check(function) -init_checks_registry() + # is the indent relative to an opening bracket line? + for open_row in reversed(open_rows[depth]): + hang = rel_indent[row] - rel_indent[open_row] + hanging_indent = hang in valid_hangs + if hanging_indent: + break + if hangs[depth]: + hanging_indent = (hang == hangs[depth]) + # is there any chance of visual indent? + visual_indent = (not close_bracket and hang > 0 and + indent_chances.get(start[1])) + if close_bracket and indent[depth]: + # closing bracket for visual indent + if start[1] != indent[depth]: + yield (start, "E124 closing bracket does not match " + "visual indentation") + elif close_bracket and not hang: + # closing bracket matches indentation of opening bracket's line + if hang_closing: + yield start, "E133 closing bracket is missing indentation" + elif indent[depth] and start[1] < indent[depth]: + if visual_indent is not True: + # visual indent is broken + yield (start, "E128 continuation line " + "under-indented for visual indent") + elif hanging_indent or (indent_next and rel_indent[row] == 8): + # hanging indent is verified + if close_bracket and not hang_closing: + yield (start, "E123 closing bracket does not match " + "indentation of opening bracket's line") + hangs[depth] = hang + elif visual_indent is True: + # visual indent is verified + indent[depth] = start[1] + elif visual_indent in (text, str): + # ignore token lined up with matching one from a previous line + pass + else: + # indent is broken + if hang <= 0: + error = "E122", "missing indentation or outdented" + elif indent[depth]: + error = "E127", "over-indented for visual indent" + elif not close_bracket and hangs[depth]: + error = "E131", "unaligned for hanging indent" + else: + hangs[depth] = hang + if hang > 4: + error = "E126", "over-indented for hanging indent" + else: + error = "E121", "under-indented for hanging indent" + yield start, "%s continuation line %s" % error -class Checker(object): - """Load a Python source file, tokenize it, check coding style.""" + # look for visual indenting + if (parens[row] and + token_type not in (tokenize.NL, tokenize.COMMENT) and + not indent[depth]): + indent[depth] = start[1] + indent_chances[start[1]] = True + if verbose >= 4: + print("bracket depth %s indent to %s" % (depth, start[1])) + # deal with implicit string concatenation + elif (token_type in (tokenize.STRING, tokenize.COMMENT) or + text in ('u', 'ur', 'b', 'br')): + indent_chances[start[1]] = str + # special case for the "if" statement because len("if (") == 4 + elif not indent_chances and not row and not depth and text == 'if': + indent_chances[end[1] + 1] = True + elif text == ':' and line[end[1]:].isspace(): + open_rows[depth].append(row) - def __init__(self, filename=None, lines=None, - options=None, report=None, **kwargs): - if options is None: - options = StyleGuide(kwargs).options - else: - assert not kwargs - self._io_error = None - self._physical_checks = options.physical_checks - self._logical_checks = options.logical_checks - self._ast_checks = options.ast_checks - self.max_line_length = options.max_line_length - self.multiline = False # in a multiline string? - self.hang_closing = options.hang_closing - self.verbose = options.verbose - self.filename = filename - # Dictionary where a checker can store its custom state. - self._checker_states = {} - if filename is None: - self.filename = 'stdin' - self.lines = lines or [] - elif filename == '-': - self.filename = 'stdin' - self.lines = stdin_get_value().splitlines(True) - elif lines is None: - try: - self.lines = readlines(filename) - except IOError: - (exc_type, exc) = sys.exc_info()[:2] - self._io_error = '%s: %s' % (exc_type.__name__, exc) - self.lines = [] - else: - self.lines = lines - if self.lines: - ord0 = ord(self.lines[0][0]) - if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM - if ord0 == 0xfeff: - self.lines[0] = self.lines[0][1:] - elif self.lines[0][:3] == '\xef\xbb\xbf': - self.lines[0] = self.lines[0][3:] - self.report = report or options.report - self.report_error = self.report.error + # keep track of bracket depth + if token_type == tokenize.OP: + if text in '([{': + depth += 1 + indent.append(0) + hangs.append(None) + if len(open_rows) == depth: + open_rows.append([]) + open_rows[depth].append(row) + parens[row] += 1 + if verbose >= 4: + print("bracket depth %s seen, col %s, visual min = %s" % + (depth, start[1], indent[depth])) + elif text in ')]}' and depth > 0: + # parent indents should not be more than this one + prev_indent = indent.pop() or last_indent[1] + hangs.pop() + for d in range(depth): + if indent[d] > prev_indent: + indent[d] = 0 + for ind in list(indent_chances): + if ind >= prev_indent: + del indent_chances[ind] + del open_rows[depth + 1:] + depth -= 1 + if depth: + indent_chances[indent[depth]] = True + for idx in range(row, -1, -1): + if parens[idx]: + parens[idx] -= 1 + break + assert len(indent) == depth + 1 + if start[1] not in indent_chances: + # allow to line up tokens + indent_chances[start[1]] = text - def report_invalid_syntax(self): - """Check if the syntax is valid.""" - (exc_type, exc) = sys.exc_info()[:2] - if len(exc.args) > 1: - offset = exc.args[1] - if len(offset) > 2: - offset = offset[1:3] + last_token_multiline = (start[0] != end[0]) + if last_token_multiline: + rel_indent[end[0] - first_row] = rel_indent[row] + + if indent_next and expand_indent(line) == indent_level + 4: + pos = (start[0], indent[0] + 4) + if visual_indent: + code = "E129 visually indented line" else: - offset = (1, 0) - self.report_error(offset[0], offset[1] or 0, - 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), - self.report_invalid_syntax) + code = "E125 continuation line" + yield pos, "%s with same indent as next logical line" % code - def readline(self): - """Get the next line from the input buffer.""" - if self.line_number >= self.total_lines: - return '' - line = self.lines[self.line_number] - self.line_number += 1 - if self.indent_char is None and line[:1] in WHITESPACE: - self.indent_char = line[0] - return line - def run_check(self, check, argument_names): - """Run a check plugin.""" - arguments = [] - for name in argument_names: - arguments.append(getattr(self, name)) - return check(*arguments) +@check.logical(codes=(E211)) +def whitespace_before_parameters(line, tokens): + r"""Avoid extraneous whitespace. - def init_checker_state(self, name, argument_names): - """ Prepares a custom state for the specific checker plugin.""" - if 'checker_state' in argument_names: - self.checker_state = self._checker_states.setdefault(name, {}) + Avoid extraneous whitespace in the following situations: + - before the open parenthesis that starts the argument list of a + function call. + - before the open parenthesis that starts an indexing or slicing. - def check_physical(self, line): - """Run all physical checks on a raw input line.""" - self.physical_line = line - for name, check, argument_names in self._physical_checks: - self.init_checker_state(name, argument_names) - result = self.run_check(check, argument_names) - if result is not None: - (offset, text) = result - self.report_error(self.line_number, offset, text, check) - if text[:4] == 'E101': - self.indent_char = line[0] + Okay: spam(1) + E211: spam (1) - def build_tokens_line(self): - """Build a logical line from tokens.""" - logical = [] - comments = [] - length = 0 - prev_row = prev_col = mapping = None - for token_type, text, start, end, line in self.tokens: - if token_type in SKIP_TOKENS: - continue - if not mapping: - mapping = [(0, start)] - if token_type == tokenize.COMMENT: - comments.append(text) - continue - if token_type == tokenize.STRING: - text = mute_string(text) - if prev_row: - (start_row, start_col) = start - if prev_row != start_row: # different row - prev_text = self.lines[prev_row - 1][prev_col - 1] - if prev_text == ',' or (prev_text not in '{[(' and - text not in '}])'): - text = ' ' + text - elif prev_col != start_col: # different column - text = line[prev_col:start_col] + text - logical.append(text) - length += len(text) - mapping.append((length, end)) - (prev_row, prev_col) = end - self.logical_line = ''.join(logical) - self.noqa = comments and noqa(''.join(comments)) - return mapping + Okay: dict['key'] = list[index] + E211: dict ['key'] = list[index] + E211: dict['key'] = list [index] + """ + prev_type, prev_text, __, prev_end, __ = tokens[0] + for index in range(1, len(tokens)): + token_type, text, start, end, __ = tokens[index] + if (token_type == tokenize.OP and + text in '([' and + start != prev_end and + (prev_type == tokenize.NAME or prev_text in '}])') and + # Syntax "class A (B):" is allowed, but avoid it + (index < 2 or tokens[index - 2][1] != 'class') and + # Allow "return (a.foo for a in range(5))" + not keyword.iskeyword(prev_text)): + yield prev_end, "E211 whitespace before '%s'" % text + prev_type = token_type + prev_text = text + prev_end = end - def check_logical(self): - """Build a line from tokens and run all logical checks on it.""" - self.report.increment_logical_line() - mapping = self.build_tokens_line() - if not mapping: - return +@check.logical(codes=(E221, E222, E223, E224)) +def whitespace_around_operator(line): + r"""Avoid extraneous whitespace around an operator. - (start_row, start_col) = mapping[0][1] - start_line = self.lines[start_row - 1] - self.indent_level = expand_indent(start_line[:start_col]) - if self.blank_before < self.blank_lines: - self.blank_before = self.blank_lines - if self.verbose >= 2: - print(self.logical_line[:80].rstrip()) - for name, check, argument_names in self._logical_checks: - if self.verbose >= 4: - print(' ' + name) - self.init_checker_state(name, argument_names) - for offset, text in self.run_check(check, argument_names) or (): - if not isinstance(offset, tuple): - for token_offset, pos in mapping: - if offset <= token_offset: - break - offset = (pos[0], pos[1] + offset - token_offset) - self.report_error(offset[0], offset[1], text, check) - if self.logical_line: - self.previous_indent_level = self.indent_level - self.previous_logical = self.logical_line - self.blank_lines = 0 - self.tokens = [] + Okay: a = 12 + 3 + E221: a = 4 + 5 + E222: a = 4 + 5 + E223: a = 4\t+ 5 + E224: a = 4 +\t5 + """ + for match in OPERATOR_REGEX.finditer(line): + before, after = match.groups() - def check_ast(self): - """Build the file's AST and run all AST checks.""" - try: - tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) - except (ValueError, SyntaxError, TypeError): - return self.report_invalid_syntax() - for name, cls, __ in self._ast_checks: - checker = cls(tree, self.filename) - for lineno, offset, text, check in checker.run(): - if not self.lines or not noqa(self.lines[lineno - 1]): - self.report_error(lineno, offset, text, check) + if '\t' in before: + yield match.start(1), "E223 tab before operator" + elif len(before) > 1: + yield match.start(1), "E221 multiple spaces before operator" - def generate_tokens(self): - """Tokenize the file, run physical line checks and yield tokens.""" - if self._io_error: - self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) - tokengen = tokenize.generate_tokens(self.readline) - try: - for token in tokengen: - if token[2][0] > self.total_lines: - return - self.maybe_check_physical(token) - yield token - except (SyntaxError, tokenize.TokenError): - self.report_invalid_syntax() + if '\t' in after: + yield match.start(2), "E224 tab after operator" + elif len(after) > 1: + yield match.start(2), "E222 multiple spaces after operator" - def maybe_check_physical(self, token): - """If appropriate (based on token), check current physical line(s).""" - # Called after every token, but act only on end of line. - if _is_eol_token(token): - # Obviously, a newline token ends a single physical line. - self.check_physical(token[4]) - elif token[0] == tokenize.STRING and '\n' in token[1]: - # Less obviously, a string that contains newlines is a - # multiline string, either triple-quoted or with internal - # newlines backslash-escaped. Check every physical line in the - # string *except* for the last one: its newline is outside of - # the multiline string, so we consider it a regular physical - # line, and will check it like any other physical line. - # - # Subtleties: - # - we don't *completely* ignore the last line; if it contains - # the magical "# noqa" comment, we disable all physical - # checks for the entire multiline string - # - have to wind self.line_number back because initially it - # points to the last line of the string, and we want - # check_physical() to give accurate feedback - if noqa(token[4]): - return - self.multiline = True - self.line_number = token[2][0] - for line in token[1].split('\n')[:-1]: - self.check_physical(line + '\n') - self.line_number += 1 - self.multiline = False - def check_all(self, expected=None, line_offset=0): - """Run all checks on the input file.""" - self.report.init_file(self.filename, self.lines, expected, line_offset) - self.total_lines = len(self.lines) - if self._ast_checks: - self.check_ast() - self.line_number = 0 - self.indent_char = None - self.indent_level = self.previous_indent_level = 0 - self.previous_logical = '' - self.tokens = [] - self.blank_lines = self.blank_before = 0 - parens = 0 - for token in self.generate_tokens(): - self.tokens.append(token) - token_type, text = token[0:2] - if self.verbose >= 3: - if token[2][0] == token[3][0]: - pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) - else: - pos = 'l.%s' % token[3][0] - print('l.%s\t%s\t%s\t%r' % - (token[2][0], pos, tokenize.tok_name[token[0]], text)) - if token_type == tokenize.OP: - if text in '([{': - parens += 1 - elif text in '}])': - parens -= 1 - elif not parens: - if token_type in NEWLINE: - if token_type == tokenize.NEWLINE: - self.check_logical() - self.blank_before = 0 - elif len(self.tokens) == 1: - # The physical line contains only this token. - self.blank_lines += 1 - del self.tokens[0] - else: - self.check_logical() - elif COMMENT_WITH_NL and token_type == tokenize.COMMENT: - if len(self.tokens) == 1: - # The comment also ends a physical line - token = list(token) - token[1] = text.rstrip('\r\n') - token[3] = (token[2][0], token[2][1] + len(token[1])) - self.tokens = [tuple(token)] - self.check_logical() - if self.tokens: - self.check_physical(self.lines[-1]) - self.check_logical() - return self.report.get_file_results() +@check.logical(codes=(E225, E226, E227, E228)) +def missing_whitespace_around_operator(line, tokens): + r"""Surround operators with a single space on either side. + + - Always surround these binary operators with a single space on + either side: assignment (=), augmented assignment (+=, -= etc.), + comparisons (==, <, >, !=, <=, >=, in, not in, is, is not), + Booleans (and, or, not). + - If operators with different priorities are used, consider adding + whitespace around the operators with the lowest priorities. -class BaseReport(object): - """Collect the results of the checks.""" + Okay: i = i + 1 + Okay: submitted += 1 + Okay: x = x * 2 - 1 + Okay: hypot2 = x * x + y * y + Okay: c = (a + b) * (a - b) + Okay: foo(bar, key='word', *args, **kwargs) + Okay: alpha[:-i] + + E225: i=i+1 + E225: submitted +=1 + E225: x = x /2 - 1 + E225: z = x **y + E226: c = (a+b) * (a-b) + E226: hypot2 = x*x + y*y + E227: c = a|b + E228: msg = fmt%(errno, errmsg) + """ + parens = 0 + need_space = False + prev_type = tokenize.OP + prev_text = prev_end = None + for token_type, text, start, end, line in tokens: + if token_type in SKIP_COMMENTS: + continue + if text in ('(', 'lambda'): + parens += 1 + elif text == ')': + parens -= 1 + if need_space: + if start != prev_end: + # Found a (probably) needed space + if need_space is not True and not need_space[1]: + yield (need_space[0], + "E225 missing whitespace around operator") + need_space = False + elif text == '>' and prev_text in ('<', '-'): + # Tolerate the "<>" operator, even if running Python 3 + # Deal with Python 3's annotated return value "->" + pass + else: + if need_space is True or need_space[1]: + # A needed trailing space was not found + yield prev_end, "E225 missing whitespace around operator" + elif prev_text != '**': + code, optype = E226, 'arithmetic' + if prev_text == '%': + code, optype = E228, 'modulo' + elif prev_text not in ARITHMETIC_OP: + code, optype = E227, 'bitwise or shift' + yield (need_space[0], "%s missing whitespace " + "around %s operator" % (code, optype)) + need_space = False + elif token_type == tokenize.OP and prev_end is not None: + if text == '=' and parens: + # Allow keyword args or defaults: foo(bar=None). + pass + elif text in WS_NEEDED_OPERATORS: + need_space = True + elif text in UNARY_OPERATORS: + # Check if the operator is being used as a binary operator + # Allow unary operators: -123, -x, +1. + # Allow argument unpacking: foo(*args, **kwargs). + if (prev_text in '}])' if prev_type == tokenize.OP + else prev_text not in KEYWORDS): + need_space = None + elif text in WS_OPTIONAL_OPERATORS: + need_space = None - print_filename = False + if need_space is None: + # Surrounding space is optional, but ensure that + # trailing space matches opening space + need_space = (prev_end, start != prev_end) + elif need_space and start == prev_end: + # A needed opening space was not found + yield prev_end, "E225 missing whitespace around operator" + need_space = False + prev_type = token_type + prev_text = text + prev_end = end - def __init__(self, options): - self._benchmark_keys = options.benchmark_keys - self._ignore_code = options.ignore_code - # Results - self.elapsed = 0 - self.total_errors = 0 - self.counters = dict.fromkeys(self._benchmark_keys, 0) - self.messages = {} - def start(self): - """Start the timer.""" - self._start_time = time.time() +@check.logical(codes=(E241, E242)) +def whitespace_around_comma(line): + r"""Avoid extraneous whitespace after a comma or a colon. - def stop(self): - """Stop the timer.""" - self.elapsed = time.time() - self._start_time + Note: these checks are disabled by default - def init_file(self, filename, lines, expected, line_offset): - """Signal a new file.""" - self.filename = filename - self.lines = lines - self.expected = expected or () - self.line_offset = line_offset - self.file_errors = 0 - self.counters['files'] += 1 - self.counters['physical lines'] += len(lines) + Okay: a = (1, 2) + E241: a = (1, 2) + E242: a = (1,\t2) + """ + for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): + found = m.start() + 1 + if '\t' in m.group(): + yield found, "E242 tab after '%s'" % m.group()[0] + else: + yield found, "E241 multiple spaces after '%s'" % m.group()[0] - def increment_logical_line(self): - """Signal a new logical line.""" - self.counters['logical lines'] += 1 - def error(self, line_number, offset, text, check): - """Report an error, according to options.""" - code = text[:4] - if self._ignore_code(code): - return - if code in self.counters: - self.counters[code] += 1 - else: - self.counters[code] = 1 - self.messages[code] = text[5:] - # Don't care about expected errors or warnings - if code in self.expected: - return - if self.print_filename and not self.file_errors: - print(self.filename) - self.file_errors += 1 - self.total_errors += 1 - return code +@check.logical(codes=(E251)) +def whitespace_around_named_parameter_equals(line, tokens): + r"""Don't use spaces around the '=' sign in function arguments. - def get_file_results(self): - """Return the count of errors and warnings for this file.""" - return self.file_errors + Don't use spaces around the '=' sign when used to indicate a + keyword argument or a default parameter value. - def get_count(self, prefix=''): - """Return the total count of errors and warnings.""" - return sum([self.counters[key] - for key in self.messages if key.startswith(prefix)]) + Okay: def complex(real, imag=0.0): + Okay: return magic(r=real, i=imag) + Okay: boolean(a == b) + Okay: boolean(a != b) + Okay: boolean(a <= b) + Okay: boolean(a >= b) + Okay: def foo(arg: int = 42): - def get_statistics(self, prefix=''): - """Get statistics for message codes that start with the prefix. + E251: def complex(real, imag = 0.0): + E251: return magic(r = real, i = imag) + """ + parens = 0 + no_space = False + prev_end = None + annotated_func_arg = False + in_def = line.startswith('def') + message = "E251 unexpected spaces around keyword / parameter equals" + for token_type, text, start, end, line in tokens: + if token_type == tokenize.NL: + continue + if no_space: + no_space = False + if start != prev_end: + yield (prev_end, message) + if token_type == tokenize.OP: + if text == '(': + parens += 1 + elif text == ')': + parens -= 1 + elif in_def and text == ':' and parens == 1: + annotated_func_arg = True + elif parens and text == ',' and parens == 1: + annotated_func_arg = False + elif parens and text == '=' and not annotated_func_arg: + no_space = True + if start != prev_end: + yield (prev_end, message) + if not parens: + annotated_func_arg = False - prefix='' matches all errors and warnings - prefix='E' matches all errors - prefix='W' matches all warnings - prefix='E4' matches all errors that have to do with imports - """ - return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) - for key in sorted(self.messages) if key.startswith(prefix)] + prev_end = end - def print_statistics(self, prefix=''): - """Print overall statistics (number of errors and warnings).""" - for line in self.get_statistics(prefix): - print(line) - def print_benchmark(self): - """Print benchmark numbers.""" - print('%-7.2f %s' % (self.elapsed, 'seconds elapsed')) - if self.elapsed: - for key in self._benchmark_keys: - print('%-7d %s per second (%d total)' % - (self.counters[key] / self.elapsed, key, - self.counters[key])) +@check.logical(codes=(E261, E262, E265, E266)) +def whitespace_before_comment(line, tokens): + r"""Separate inline comments by at least two spaces. + An inline comment is a comment on the same line as a statement. Inline + comments should be separated by at least two spaces from the statement. + They should start with a # and a single space. -class FileReport(BaseReport): - """Collect the results of the checks and print only the filenames.""" - print_filename = True + Each line of a block comment starts with a # and a single space + (unless it is indented text inside the comment). + Okay: x = x + 1 # Increment x + Okay: x = x + 1 # Increment x + Okay: # Block comment + E261: x = x + 1 # Increment x + E262: x = x + 1 #Increment x + E262: x = x + 1 # Increment x + E265: #Block comment + E266: ### Block comment + """ + prev_end = (0, 0) + for token_type, text, start, end, line in tokens: + if token_type == tokenize.COMMENT: + inline_comment = line[:start[1]].strip() + if inline_comment: + if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: + yield (prev_end, + "E261 at least two spaces before inline comment") + symbol, sp, comment = text.partition(' ') + bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#') + if inline_comment: + if bad_prefix or comment[:1] in WHITESPACE: + yield start, "E262 inline comment should start with '# '" + elif bad_prefix and (bad_prefix != '!' or start[0] > 1): + if bad_prefix != '#': + yield start, "E265 block comment should start with '# '" + elif comment: + yield start, "E266 too many leading '#' for block comment" + elif token_type != tokenize.NL: + prev_end = end -class StandardReport(BaseReport): - """Collect and print the results of the checks.""" - def __init__(self, options): - super(StandardReport, self).__init__(options) - self._fmt = REPORT_FORMAT.get(options.format.lower(), - options.format) - self._repeat = options.repeat - self._show_source = options.show_source - self._show_pep8 = options.show_pep8 +@check.logical(codes=(E401)) +def imports_on_separate_lines(line): + r"""Imports should usually be on separate lines. - def init_file(self, filename, lines, expected, line_offset): - """Signal a new file.""" - self._deferred_print = [] - return super(StandardReport, self).init_file( - filename, lines, expected, line_offset) + Okay: import os\nimport sys + E401: import sys, os - def error(self, line_number, offset, text, check): - """Report an error, according to options.""" - code = super(StandardReport, self).error(line_number, offset, - text, check) - if code and (self.counters[code] == 1 or self._repeat): - self._deferred_print.append( - (line_number, offset, code, text[5:], check.__doc__)) - return code + Okay: from subprocess import Popen, PIPE + Okay: from myclas import MyClass + Okay: from foo.bar.yourclass import YourClass + Okay: import myclass + Okay: import foo.bar.yourclass + """ + if line.startswith('import '): + found = line.find(',') + if -1 < found and ';' not in line[:found]: + yield found, "E401 multiple imports on one line" - def get_file_results(self): - """Print the result and return the overall count for this file.""" - self._deferred_print.sort() - for line_number, offset, code, text, doc in self._deferred_print: - print(self._fmt % { - 'path': self.filename, - 'row': self.line_offset + line_number, 'col': offset + 1, - 'code': code, 'text': text, - }) - if self._show_source: - if line_number > len(self.lines): - line = '' - else: - line = self.lines[line_number - 1] - print(line.rstrip()) - print(re.sub(r'\S', ' ', line[:offset]) + '^') - if self._show_pep8 and doc: - print(' ' + doc.strip()) - # stdout is block buffered when not stdout.isatty(). - # line can be broken where buffer boundary since other processes - # write to same file. - # flush() after print() to avoid buffer boundary. - # Typical buffer size is 8192. line written safely when - # len(line) < 8192. - sys.stdout.flush() - return self.file_errors +@check.logical(codes=(E402)) +def module_imports_on_top_of_file(line, indent_level, checker_state, noqa): + r"""Imports are always put at the top of the file, just after any module + comments and docstrings, and before module globals and constants. + Okay: import os + Okay: # this is a comment\nimport os + Okay: '''this is a module docstring'''\nimport os + Okay: r'''this is a module docstring'''\nimport os + Okay: try:\n import x\nexcept:\n pass\nelse:\n pass\nimport y + Okay: try:\n import x\nexcept:\n pass\nfinally:\n pass\nimport y + E402: a=1\nimport os + E402: 'One string'\n"Two string"\nimport os + E402: a=1\nfrom sys import x -class DiffReport(StandardReport): - """Collect and print the results for the changed lines only.""" + Okay: if x:\n import os + """ + def is_string_literal(line): + if line[0] in 'uUbB': + line = line[1:] + if line and line[0] in 'rR': + line = line[1:] + return line and (line[0] == '"' or line[0] == "'") - def __init__(self, options): - super(DiffReport, self).__init__(options) - self._selected = options.selected_lines + allowed_try_keywords = ('try', 'except', 'else', 'finally') - def error(self, line_number, offset, text, check): - if line_number not in self._selected[self.filename]: - return - return super(DiffReport, self).error(line_number, offset, text, check) + if indent_level: # Allow imports in conditional statements or functions + return + if not line: # Allow empty lines or comments + return + if noqa: + return + if line.startswith('import ') or line.startswith('from '): + if checker_state.get('seen_non_imports', False): + yield 0, "E402 module level import not at top of file" + elif any(line.startswith(kw) for kw in allowed_try_keywords): + # Allow try, except, else, finally keywords intermixed with imports in + # order to support conditional importing + return + elif is_string_literal(line): + # The first literal is a docstring, allow it. Otherwise, report error. + if checker_state.get('seen_docstring', False): + checker_state['seen_non_imports'] = True + else: + checker_state['seen_docstring'] = True + else: + checker_state['seen_non_imports'] = True -class StyleGuide(object): - """Initialize a PEP-8 instance with few options.""" +@check.logical(codes=(E701, E702, E703, E704, E731)) +def compound_statements(line): + r"""Compound statements (on the same line) are generally discouraged. - def __init__(self, *args, **kwargs): - # build options from the command line - self.checker_class = kwargs.pop('checker_class', Checker) - parse_argv = kwargs.pop('parse_argv', False) - config_file = kwargs.pop('config_file', False) - parser = kwargs.pop('parser', None) - # build options from dict - options_dict = dict(*args, **kwargs) - arglist = None if parse_argv else options_dict.get('paths', None) - options, self.paths = process_options( - arglist, parse_argv, config_file, parser) - if options_dict: - options.__dict__.update(options_dict) - if 'paths' in options_dict: - self.paths = options_dict['paths'] + While sometimes it's okay to put an if/for/while with a small body + on the same line, never do this for multi-clause statements. + Also avoid folding such long lines! - self.runner = self.input_file - self.options = options + Always use a def statement instead of an assignment statement that + binds a lambda expression directly to a name. - if not options.reporter: - options.reporter = BaseReport if options.quiet else StandardReport + Okay: if foo == 'blah':\n do_blah_thing() + Okay: do_one() + Okay: do_two() + Okay: do_three() - options.select = tuple(options.select or ()) - if not (options.select or options.ignore or - options.testsuite or options.doctest) and DEFAULT_IGNORE: - # The default choice: ignore controversial checks - options.ignore = tuple(DEFAULT_IGNORE.split(',')) + E701: if foo == 'blah': do_blah_thing() + E701: for x in lst: total += x + E701: while t < 10: t = delay() + E701: if foo == 'blah': do_blah_thing() + E701: else: do_non_blah_thing() + E701: try: something() + E701: finally: cleanup() + E701: if foo == 'blah': one(); two(); three() + E702: do_one(); do_two(); do_three() + E703: do_four(); # useless semicolon + E704: def f(x): return 2*x + E731: f = lambda x: 2*x + """ + last_char = len(line) - 1 + found = line.find(':') + while -1 < found < last_char: + before = line[:found] + if ((before.count('{') <= before.count('}') and # {'a': 1} (dict) + before.count('[') <= before.count(']') and # [1:2] (slice) + before.count('(') <= before.count(')'))): # (annotation) + lambda_kw = LAMBDA_REGEX.search(before) + if lambda_kw: + before = line[:lambda_kw.start()].rstrip() + if before[-1:] == '=' and isidentifier(before[:-1].strip()): + yield 0, ("E731 do not assign a lambda expression, use a " + "def") + break + if before.startswith('def '): + yield 0, "E704 multiple statements on one line (def)" + else: + yield found, "E701 multiple statements on one line (colon)" + found = line.find(':', found + 1) + found = line.find(';') + while -1 < found: + if found < last_char: + yield found, "E702 multiple statements on one line (semicolon)" else: - # Ignore all checks which are not explicitly selected - options.ignore = ('',) if options.select else tuple(options.ignore) - options.benchmark_keys = BENCHMARK_KEYS[:] - options.ignore_code = self.ignore_code - options.physical_checks = self.get_checks('physical_line') - options.logical_checks = self.get_checks('logical_line') - options.ast_checks = self.get_checks('tree') - self.init_report() + yield found, "E703 statement ends with a semicolon" + found = line.find(';', found + 1) - def init_report(self, reporter=None): - """Initialize the report instance.""" - self.options.report = (reporter or self.options.reporter)(self.options) - return self.options.report - def check_files(self, paths=None): - """Run all checks on the paths.""" - if paths is None: - paths = self.paths - report = self.options.report - runner = self.runner - report.start() - try: - for path in paths: - if os.path.isdir(path): - self.input_dir(path) - elif not self.excluded(path): - runner(path) - except KeyboardInterrupt: - print('... stopped') - report.stop() - return report +@check.logical(codes=(E502)) +def explicit_line_join(line, tokens): + r"""Avoid explicit line join between brackets. - def input_file(self, filename, lines=None, expected=None, line_offset=0): - """Run all checks on a Python source file.""" - if self.options.verbose: - print('checking %s' % filename) - fchecker = self.checker_class( - filename, lines=lines, options=self.options) - return fchecker.check_all(expected=expected, line_offset=line_offset) + The preferred way of wrapping long lines is by using Python's implied line + continuation inside parentheses, brackets and braces. Long lines can be + broken over multiple lines by wrapping expressions in parentheses. These + should be used in preference to using a backslash for line continuation. - def input_dir(self, dirname): - """Check all files in this directory and all subdirectories.""" - dirname = dirname.rstrip('/') - if self.excluded(dirname): - return 0 - counters = self.options.report.counters - verbose = self.options.verbose - filepatterns = self.options.filename - runner = self.runner - for root, dirs, files in os.walk(dirname): - if verbose: - print('directory ' + root) - counters['directories'] += 1 - for subdir in sorted(dirs): - if self.excluded(subdir, root): - dirs.remove(subdir) - for filename in sorted(files): - # contain a pattern that matches? - if ((filename_match(filename, filepatterns) and - not self.excluded(filename, root))): - runner(os.path.join(root, filename)) + E502: aaa = [123, \\n 123] + E502: aaa = ("bbb " \\n "ccc") - def excluded(self, filename, parent=None): - """Check if the file should be excluded. + Okay: aaa = [123,\n 123] + Okay: aaa = ("bbb "\n "ccc") + Okay: aaa = "bbb " \\n "ccc" + Okay: aaa = 123 # \\ + """ + prev_start = prev_end = parens = 0 + comment = False + backslash = None + for token_type, text, start, end, line in tokens: + if token_type == tokenize.COMMENT: + comment = True + if start[0] != prev_start and parens and backslash and not comment: + yield backslash, "E502 the backslash is redundant between brackets" + if end[0] != prev_end: + if line.rstrip('\r\n').endswith('\\'): + backslash = (end[0], len(line.splitlines()[-1]) - 1) + else: + backslash = None + prev_start = prev_end = end[0] + else: + prev_start = start[0] + if token_type == tokenize.OP: + if text in '([{': + parens += 1 + elif text in ')]}': + parens -= 1 - Check if 'options.exclude' contains a pattern that matches filename. - """ - if not self.options.exclude: - return False - basename = os.path.basename(filename) - if filename_match(basename, self.options.exclude): - return True - if parent: - filename = os.path.join(parent, filename) - filename = os.path.abspath(filename) - return filename_match(filename, self.options.exclude) - def ignore_code(self, code): - """Check if the error code should be ignored. +@check.logical(codes=(E503)) +def break_around_binary_operator(line, tokens): + r""" + Avoid breaks before binary operators. - If 'options.select' contains a prefix of the error code, - return False. Else, if 'options.ignore' contains a prefix of - the error code, return True. - """ - if len(code) < 4 and any(s.startswith(code) - for s in self.options.select): - return False - return (code.startswith(self.options.ignore) and - not code.startswith(self.options.select)) + The preferred place to break around a binary operator is after the + operator, not before it. - def get_checks(self, argument_name): - """Get all the checks for this category. + W503: (width == 0\n + height == 0) + W503: (width == 0\n and height == 0) - Find all globally visible functions where the first argument name - starts with argument_name and which contain selected tests. - """ - checks = [] - for check, attrs in _checks[argument_name].items(): - (codes, args) = attrs - if any(not (code and self.ignore_code(code)) for code in codes): - checks.append((check.__name__, check, args)) - return sorted(checks) + Okay: (width == 0 +\n height == 0) + Okay: foo(\n -x) + Okay: foo(x\n []) + Okay: x = '''\n''' + '' + Okay: foo(x,\n -y) + Okay: foo(x, # comment\n -y) + """ + def is_binary_operator(token_type, text): + # The % character is strictly speaking a binary operator, but the + # common usage seems to be to put it next to the format parameters, + # after a line break. + return ((token_type == tokenize.OP or text in ['and', 'or']) and + text not in "()[]{},:.;@=%") + line_break = False + unary_context = True + for token_type, text, start, end, line in tokens: + if token_type == tokenize.COMMENT: + continue + if ('\n' in text or '\r' in text) and token_type != tokenize.STRING: + line_break = True + else: + if (is_binary_operator(token_type, text) and line_break and + not unary_context): + yield start, "W503 line break before binary operator" + unary_context = text in '([{,;' + line_break = False -def get_parser(prog='pep8', version=__version__): - parser = OptionParser(prog=prog, version=version, - usage="%prog [options] input ...") - parser.config_options = [ - 'exclude', 'filename', 'select', 'ignore', 'max-line-length', - 'hang-closing', 'count', 'format', 'quiet', 'show-pep8', - 'show-source', 'statistics', 'verbose'] - parser.add_option('-v', '--verbose', default=0, action='count', - help="print status messages, or debug with -vv") - parser.add_option('-q', '--quiet', default=0, action='count', - help="report only file names, or nothing with -qq") - parser.add_option('-r', '--repeat', default=True, action='store_true', - help="(obsolete) show all occurrences of the same error") - parser.add_option('--first', action='store_false', dest='repeat', - help="show first occurrence of each error") - parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, - help="exclude files or directories which match these " - "comma separated patterns (default: %default)") - parser.add_option('--filename', metavar='patterns', default='*.py', - help="when parsing directories, only check filenames " - "matching these comma separated patterns " - "(default: %default)") - parser.add_option('--select', metavar='errors', default='', - help="select errors and warnings (e.g. E,W6)") - parser.add_option('--ignore', metavar='errors', default='', - help="skip errors and warnings (e.g. E4,W) " - "(default: %s)" % DEFAULT_IGNORE) - parser.add_option('--show-source', action='store_true', - help="show source code for each error") - parser.add_option('--show-pep8', action='store_true', - help="show text of PEP 8 for each error " - "(implies --first)") - parser.add_option('--statistics', action='store_true', - help="count errors and warnings") - parser.add_option('--count', action='store_true', - help="print total number of errors and warnings " - "to standard error and set exit code to 1 if " - "total is not null") - parser.add_option('--max-line-length', type='int', metavar='n', - default=MAX_LINE_LENGTH, - help="set maximum allowed line length " - "(default: %default)") - parser.add_option('--hang-closing', action='store_true', - help="hang closing bracket instead of matching " - "indentation of opening bracket's line") - parser.add_option('--format', metavar='format', default='default', - help="set the error format [default|pylint|]") - parser.add_option('--diff', action='store_true', - help="report changes only within line number ranges in " - "the unified diff received on STDIN") - group = parser.add_option_group("Testing Options") - if os.path.exists(TESTSUITE_PATH): - group.add_option('--testsuite', metavar='dir', - help="run regression tests from dir") - group.add_option('--doctest', action='store_true', - help="run doctest on myself") - group.add_option('--benchmark', action='store_true', - help="measure processing speed") - return parser +@check.logical(codes=(E711, E712)) +def comparison_to_singleton(line, noqa): + r"""Comparison to singletons should use "is" or "is not". -def read_config(options, args, arglist, parser): - """Read and parse configurations + Comparisons to singletons like None should always be done + with "is" or "is not", never the equality operators. - If a config file is specified on the command line with the "--config" - option, then only it is used for configuration. + Okay: if arg is not None: + E711: if arg != None: + E711: if None == arg: + E712: if arg == True: + E712: if False == arg: - Otherwise, the user configuration (~/.config/pep8) and any local - configurations in the current directory or above will be merged together - (in that order) using the read method of ConfigParser. + Also, beware of writing if x when you really mean if x is not None -- + e.g. when testing whether a variable or argument that defaults to None was + set to some other value. The other value might have a type (such as a + container) that could be false in a boolean context! """ - config = RawConfigParser() + match = not noqa and COMPARE_SINGLETON_REGEX.search(line) + if match: + singleton = match.group(1) or match.group(3) + same = (match.group(2) == '==') - cli_conf = options.config + msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) + if singleton in ('None',): + code = E711 + else: + code = E712 + nonzero = ((singleton == 'True' and same) or + (singleton == 'False' and not same)) + msg += " or 'if %scond:'" % ('' if nonzero else 'not ') + yield match.start(2), ("%s comparison to %s should be %s" % + (code, singleton, msg)) - local_dir = os.curdir - if USER_CONFIG and os.path.isfile(USER_CONFIG): - if options.verbose: - print('user configuration: %s' % USER_CONFIG) - config.read(USER_CONFIG) +@check.logical(codes=(E713, E714)) +def comparison_negative(line): + r"""Negative comparison should be done using "not in" and "is not". - parent = tail = args and os.path.abspath(os.path.commonprefix(args)) - while tail: - if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG): - local_dir = parent - if options.verbose: - print('local configuration: in %s' % parent) - break - (parent, tail) = os.path.split(parent) + Okay: if x not in y:\n pass + Okay: assert (X in Y or X is Z) + Okay: if not (X in Y):\n pass + Okay: zz = x is not y + E713: Z = not X in Y + E713: if not X.B in Y:\n pass + E714: if not X is Y:\n pass + E714: Z = not X.B is Y + """ + match = COMPARE_NEGATIVE_REGEX.search(line) + if match: + pos = match.start(1) + if match.group(2) == 'in': + yield pos, "E713 test for membership should be 'not in'" + else: + yield pos, "E714 test for object identity should be 'is not'" - if cli_conf and os.path.isfile(cli_conf): - if options.verbose: - print('cli configuration: %s' % cli_conf) - config.read(cli_conf) - pep8_section = parser.prog - if config.has_section(pep8_section): - option_list = dict([(o.dest, o.type or o.action) - for o in parser.option_list]) +@check.logical(codes=(E721)) +def comparison_type(line, noqa): + r"""Object type comparisons should always use isinstance(). - # First, read the default values - (new_options, __) = parser.parse_args([]) + Do not compare types directly. - # Second, parse the configuration - for opt in config.options(pep8_section): - if opt.replace('_', '-') not in parser.config_options: - print(" unknown option '%s' ignored" % opt) - continue - if options.verbose > 1: - print(" %s = %s" % (opt, config.get(pep8_section, opt))) - normalized_opt = opt.replace('-', '_') - opt_type = option_list[normalized_opt] - if opt_type in ('int', 'count'): - value = config.getint(pep8_section, opt) - elif opt_type == 'string': - value = config.get(pep8_section, opt) - if normalized_opt == 'exclude': - value = normalize_paths(value, local_dir) - else: - assert opt_type in ('store_true', 'store_false') - value = config.getboolean(pep8_section, opt) - setattr(new_options, normalized_opt, value) + Okay: if isinstance(obj, int): + E721: if type(obj) is type(1): - # Third, overwrite with the command-line options - (options, __) = parser.parse_args(arglist, values=new_options) - options.doctest = options.testsuite = False - return options + When checking if an object is a string, keep in mind that it might be a + unicode string too! In Python 2.3, str and unicode have a common base + class, basestring, so you can do: + + Okay: if isinstance(obj, basestring): + Okay: if type(a1) is type(b1): + """ + match = COMPARE_TYPE_REGEX.search(line) + if match and not noqa: + inst = match.group(1) + if inst and isidentifier(inst) and inst not in SINGLETONS: + return # Allow comparison for types which are not obvious + yield match.start(), "E721 do not compare types, use 'isinstance()'" -def process_options(arglist=None, parse_argv=False, config_file=None, - parser=None): - """Process options passed either via arglist or via command line args. +@check.logical(codes=(W601)) +def python_3000_has_key(line, noqa): + r"""The {}.has_key() method is removed in Python 3: use the 'in' operator. - Passing in the ``config_file`` parameter allows other tools, such as flake8 - to specify their own options to be processed in pep8. + Okay: if "alph" in d:\n print d["alph"] + W601: assert d.has_key('alph') """ - if not parser: - parser = get_parser() - if not parser.has_option('--config'): - group = parser.add_option_group("Configuration", description=( - "The project options are read from the [%s] section of the " - "tox.ini file or the setup.cfg file located in any parent folder " - "of the path(s) being processed. Allowed options are: %s." % - (parser.prog, ', '.join(parser.config_options)))) - group.add_option('--config', metavar='path', default=config_file, - help="user config file location") - # Don't read the command line if the module is used as a library. - if not arglist and not parse_argv: - arglist = [] - # If parse_argv is True and arglist is None, arguments are - # parsed from the command line (sys.argv) - (options, args) = parser.parse_args(arglist) - options.reporter = None + pos = line.find('.has_key(') + if pos > -1 and not noqa: + yield pos, "W601 .has_key() is deprecated, use 'in'" - if options.ensure_value('testsuite', False): - args.append(options.testsuite) - elif not options.ensure_value('doctest', False): - if parse_argv and not args: - if options.diff or any(os.path.exists(name) - for name in PROJECT_CONFIG): - args = ['.'] - else: - parser.error('input not specified') - options = read_config(options, args, arglist, parser) - options.reporter = parse_argv and options.quiet == 1 and FileReport - options.filename = _parse_multi_options(options.filename) - options.exclude = normalize_paths(options.exclude) - options.select = _parse_multi_options(options.select) - options.ignore = _parse_multi_options(options.ignore) +@check.logical(codes=(W602)) +def python_3000_raise_comma(line): + r"""When raising an exception, use "raise ValueError('message')". - if options.diff: - options.reporter = DiffReport - stdin = stdin_get_value() - options.selected_lines = parse_udiff(stdin, options.filename, args[0]) - args = sorted(options.selected_lines) + The older form is removed in Python 3. - return options, args + Okay: raise DummyError("Message") + W602: raise DummyError, "Message" + """ + match = RAISE_COMMA_REGEX.match(line) + if match and not RERAISE_COMMA_REGEX.match(line): + yield match.end() - 1, "W602 deprecated form of raising exception" -def _parse_multi_options(options, split_token=','): - r"""Split and strip and discard empties. +@check.logical(codes=(W603)) +def python_3000_not_equal(line): + r"""New code should always use != instead of <>. - Turns the following: + The older syntax is removed in Python 3. - A, - B, + Okay: if a != 'no': + W603: if a <> 'no': + """ + pos = line.find('<>') + if pos > -1: + yield pos, "W603 '<>' is deprecated, use '!='" - into ["A", "B"] + +@check.logical(codes=(W604)) +def python_3000_backticks(line): + r"""Backticks are removed in Python 3: use repr() instead. + + Okay: val = repr(1 + 2) + W604: val = `1 + 2` """ - if options: - return [o.strip() for o in options.split(split_token) if o.strip()] - else: - return options + pos = line.find('`') + if pos > -1: + yield pos, "W604 backticks are deprecated, use 'repr()'" def _main():