diff --git a/flake8_rst/application.py b/flake8_rst/application.py index 1ea509e..0a187e1 100644 --- a/flake8_rst/application.py +++ b/flake8_rst/application.py @@ -21,6 +21,15 @@ def __init__(self, program='flake8-rst', version=__version__): '--default-groupnames', default="*.rst->*: default", parse_from_config=True, help='Set default group names.', type='string', ) + self.option_manager.add_option( + '--check-languages', default=["python3", "py3", "pycon", "python", "py"], parse_from_config=True, + help='List of highlight-languages which are checked when using literal code-blocks (::).', type='string', + ) + self.option_manager.add_option( + '--highlight-language', default="python3", parse_from_config=True, + help='Default language when no `.. highlight::` directive was used yet. Corresponds to' + '`highlight-language` config variable of sphinx.', type='string', + ) options.register_default_options(self.option_manager) def make_file_checker_manager(self): diff --git a/flake8_rst/rst.py b/flake8_rst/rst.py index 445e569..1bcb631 100644 --- a/flake8_rst/rst.py +++ b/flake8_rst/rst.py @@ -1,3 +1,5 @@ +import itertools + import re from fnmatch import fnmatch from functools import wraps @@ -16,15 +18,26 @@ re.MULTILINE, ) +MARKER_RE = re.compile( + r'(?P' + r'^(?P *)^(?!\.{2} ).*::$' + r'(?P(^(?P=indent) +:\S+:.*\n)*)' + r'\n*' + r')' + r'(?P(^((?P=indent) {3} *.*)?\n)+(^(?P=indent) {3} *.*(\n)?))', + re.MULTILINE, +) + DOCSTRING_RE = re.compile( r'(?P\n?)' r'^(?P((?P *)\"{3}.*\n(?:(?:(?P=indent).+)?\n)*(?P=indent)\"{3}))', re.MULTILINE, ) +HIGHLIGHT_RE = re.compile(r'^\.\. (?Phighlight):: (?P\w+)$', re.MULTILINE) -def merge_by_group(func): +def merge_by_group(func): @wraps(func) def func_wrapper(*args, **kwargs): blocks = {} @@ -79,18 +92,27 @@ def func_wrapper(filename, options, *args, **kwargs): return func_wrapper +def should_check_for_literal_blocks(source_block, highlight_languages): + return source_block.directive == 'highlight' and source_block.language in highlight_languages + + @apply_directive_specific_options @merge_by_group @apply_default_groupnames def find_sourcecode(filename, options, src): contains_python_code = filename.split('.')[-1].startswith('py') - source = SourceBlock.from_source(options.bootstrap, src) - source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else [source] + source = SourceBlock.from_source(options.bootstrap, src, language=options.highlight_language) + source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else source.split_by(HIGHLIGHT_RE) + + highlight_languages = options.check_languages for source_block in source_blocks: - inner_blocks = source_block.find_blocks(RST_RE) + search_expression = {RST_RE} + if should_check_for_literal_blocks(source_block, highlight_languages): + search_expression.add(MARKER_RE) + found_inner_block = False - for inner_block in inner_blocks: + for inner_block in itertools.chain.from_iterable((source_block.find_blocks(exp) for exp in search_expression)): found_inner_block = True inner_block.clean() yield inner_block diff --git a/flake8_rst/sourceblock.py b/flake8_rst/sourceblock.py index 435ac13..f8b055c 100644 --- a/flake8_rst/sourceblock.py +++ b/flake8_rst/sourceblock.py @@ -81,7 +81,7 @@ def merge(cls, source_blocks): return cls(boot_lines, source_lines, directive=main_block.directive, language=main_block.language, roles=main_block.roles) - def __init__(self, boot_lines, source_lines, directive='', language='', roles=None): + def __init__(self, boot_lines, source_lines, directive='highlight', language='python3', roles=None): self._boot_lines = boot_lines self._source_lines = source_lines self.directive = directive @@ -134,6 +134,23 @@ def find_blocks(self, expression): source_block.remove_indentation() yield source_block + def split_by(self, expression): + src = self._source_lines + lines = [] + directive, language = self.directive, self.language + for line in src: + match = re.match(expression, line[SOURCE]) + if match: + if lines: + yield SourceBlock(self._boot_lines, lines, directive=directive, language=language, roles=self.roles) + lines = [] + directive = match.group('directive') + language = match.group('language') + else: + lines.append(line) + if lines: + yield SourceBlock(self._boot_lines, lines, directive=directive, language=language, roles=self.roles) + def remove_indentation(self): indentation = min(INDENT_RE.findall(self.source_block)) if indentation: diff --git a/tests/data/example_14.rst b/tests/data/example_14.rst new file mode 100644 index 0000000..e6559ff --- /dev/null +++ b/tests/data/example_14.rst @@ -0,0 +1,22 @@ +.. highlight:: sh + +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum:: + + cd /bin/lib/ + +.. highlight:: py + +Intermediate output:: + + # extract 100 LDA topics, using default parameters + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + +Final output :: + + # extract 100 LDA topics, using default parameters + lda = LdaModel(corpus=mm, id2word=id2word, + num_topics=100, distributed=distribution_required) + +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum. \ No newline at end of file diff --git a/tests/result_py2/result_14.py b/tests/result_py2/result_14.py new file mode 100644 index 0000000..7681c5e --- /dev/null +++ b/tests/result_py2/result_14.py @@ -0,0 +1,25 @@ +('test_precisely', + [('F821', + 13, + 9, + "undefined name 'LdbModel'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 25, + "undefined name 'mm'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 37, + "undefined name 'id2word'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', 18, 10, "undefined name 'LdaModel'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 26, "undefined name 'mm'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 38, "undefined name 'id2word'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', + 19, + 47, + "undefined name 'distribution_required'", + u' num_topics=100, distributed=distribution_required)\n')], + {'logical lines': 4, 'physical lines': 5, 'tokens': 47}) diff --git a/tests/result_py3/result_14.py b/tests/result_py3/result_14.py new file mode 100644 index 0000000..87394b7 --- /dev/null +++ b/tests/result_py3/result_14.py @@ -0,0 +1,25 @@ +('test_precisely', + [('F821', + 13, + 9, + "undefined name 'LdbModel'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 25, + "undefined name 'mm'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 37, + "undefined name 'id2word'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', 18, 10, "undefined name 'LdaModel'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 26, "undefined name 'mm'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 38, "undefined name 'id2word'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', + 19, + 47, + "undefined name 'distribution_required'", + ' num_topics=100, distributed=distribution_required)\n')], + {'logical lines': 4, 'physical lines': 5, 'tokens': 47}) diff --git a/tests/summary_py2/summary_14.txt b/tests/summary_py2/summary_14.txt new file mode 100644 index 0000000..9be520c --- /dev/null +++ b/tests/summary_py2/summary_14.txt @@ -0,0 +1,21 @@ +./data/example_14.rst:13:10: F821 undefined name 'LdbModel' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:26: F821 undefined name 'mm' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:38: F821 undefined name 'id2word' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:18:11: F821 undefined name 'LdaModel' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:27: F821 undefined name 'mm' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:39: F821 undefined name 'id2word' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:19:48: F821 undefined name 'distribution_required' + num_topics=100, distributed=distribution_required) + ^ diff --git a/tests/summary_py3/summary_14.txt b/tests/summary_py3/summary_14.txt new file mode 100644 index 0000000..9be520c --- /dev/null +++ b/tests/summary_py3/summary_14.txt @@ -0,0 +1,21 @@ +./data/example_14.rst:13:10: F821 undefined name 'LdbModel' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:26: F821 undefined name 'mm' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:38: F821 undefined name 'id2word' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:18:11: F821 undefined name 'LdaModel' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:27: F821 undefined name 'mm' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:39: F821 undefined name 'id2word' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:19:48: F821 undefined name 'distribution_required' + num_topics=100, distributed=distribution_required) + ^ diff --git a/tests/test_precisely.py b/tests/test_precisely.py index 23a63ea..3f68b2c 100644 --- a/tests/test_precisely.py +++ b/tests/test_precisely.py @@ -7,8 +7,8 @@ @pytest.fixture() def options(mocker): - return mocker.Mock(max_line_length=80, verbose=0, hang_closing=False, - ignore=[], bootstrap=None, default_groupnames='*.rst->*: default') + return mocker.Mock(max_line_length=80, verbose=0, hang_closing=False, highlight_language='python3', + ignore=[], bootstrap=None, default_groupnames='*.rst->*: default', check_languages=['py']) @pytest.fixture() diff --git a/tests/test_source_block.py b/tests/test_source_block.py index 6518c7c..fbe34cd 100644 --- a/tests/test_source_block.py +++ b/tests/test_source_block.py @@ -7,7 +7,8 @@ except ImportError: import pathlib2 as pathlib -from flake8_rst.rst import RST_RE, apply_default_groupnames, apply_directive_specific_options, merge_by_group +from flake8_rst.rst import RST_RE, apply_default_groupnames, apply_directive_specific_options, merge_by_group, \ + HIGHLIGHT_RE from flake8_rst.sourceblock import SourceBlock, _extract_roles from hypothesis import assume, given, note, example from hypothesis import strategies as st @@ -15,7 +16,7 @@ ROOT_DIR = pathlib.Path(__file__).parent DATA_DIR = ROOT_DIR / 'data' -code_strategy = st.characters(blacklist_categories=['Cc']) +code_strategy = st.characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']) @given(code_strategy, code_strategy) @@ -52,6 +53,36 @@ def test_find_block(): assert block.source_block == origin_code +highlight_language = st.sampled_from(('python3', 'sh', 'pytb')) + + +@given(st.lists(st.tuples(highlight_language, code_strategy), min_size=1)) +def test_split_block(blocks): + src = u''.join((u'.. highlight:: {}\n{}\n'.format(language, source) for language, source in blocks)) + + code_block = SourceBlock.from_source('', src) + code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) + + assert len(code_blocks) == len(blocks) + for (language, source), block in zip(blocks, code_blocks): + assume('highlight' not in source) + assert block.language == language + assert block.directive == 'highlight' + assert block.source_block == source + '\n' + + +@given(highlight_language, st.lists(code_strategy, min_size=1)) +def test_split_with_default_block(language, blocks): + src = u'\n.. highlight:: {}\n'.format(language).join(blocks) + '\n' + code_block = SourceBlock.from_source('', src, language=language) + code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) + assert len(code_blocks) == len(blocks) + for source, block in zip(blocks, code_blocks): + assert block.language == language + assert block.directive == 'highlight' + assert block.source_block == source + '\n' + + def test_clean_doctest(): example = DATA_DIR / 'example_1.rst' src = example.open().read()