From c4c6aa6ed7608d52ce49d978f6d932c58518b781 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Fri, 11 Jan 2019 21:14:28 +0100 Subject: [PATCH 1/5] Add regex Signed-off-by: Fabian Haase --- flake8_rst/rst.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/flake8_rst/rst.py b/flake8_rst/rst.py index aff5002..c585628 100644 --- a/flake8_rst/rst.py +++ b/flake8_rst/rst.py @@ -1,3 +1,5 @@ +import itertools + import re from fnmatch import fnmatch from functools import wraps @@ -16,12 +18,32 @@ re.MULTILINE, ) +RST_RE = re.compile( + r'(?P' + r'^(?P *)^(?!\.{2} ).*::$' + r'(?P(^(?P=indent) +:\S+:.*\n)*)' + r'\n*' + r')' + r'(?P(^((?P=indent) {3} *.*)?\n)+(^(?P=indent) {3} *.*(\n)?))', + re.MULTILINE, +) + + DOCSTRING_RE = re.compile( r'(?P\n?)' r'^(?P((?P *)\"{3}.*\n(?:(?:(?P=indent).+)?\n)*(?P=indent)\"{3}))', re.MULTILINE, ) +HIGHLIGHT_RE = re.compile( + r'(?P' + r'^\.\. (?Phighlight):: (?P.*)\n' + r'( .*\n)*' + r')' + r'(?P(.*\n)*)', + re.MULTILINE, +) + def merge_by_group(func): @@ -87,6 +109,8 @@ def find_sourcecode(filename, options, src): for source_block in source_blocks: inner_blocks = source_block.find_blocks(RST_RE) + if source_block.directive == 'highlight' and source_block.language in ['python3']: + inner_blocks = itertools.chain(inner_blocks, source_block.find_blocks(MARKER_RE)) found_inner_block = False for inner_block in inner_blocks: found_inner_block = True From 4d254d7e79f15e736f6f67a1a3f97acabc63ab93 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sun, 13 Jan 2019 19:11:33 +0100 Subject: [PATCH 2/5] split source_blocks by pattern Signed-off-by: Fabian Haase --- flake8_rst/rst.py | 15 +++------------ flake8_rst/sourceblock.py | 15 ++++++++++++++- tests/test_source_block.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/flake8_rst/rst.py b/flake8_rst/rst.py index 416b58f..202d02a 100644 --- a/flake8_rst/rst.py +++ b/flake8_rst/rst.py @@ -18,7 +18,7 @@ re.MULTILINE, ) -RST_RE = re.compile( +MARKER_RE = re.compile( r'(?P' r'^(?P *)^(?!\.{2} ).*::$' r'(?P(^(?P=indent) +:\S+:.*\n)*)' @@ -28,25 +28,16 @@ re.MULTILINE, ) - DOCSTRING_RE = re.compile( r'(?P\n?)' r'^(?P((?P *)\"{3}.*\n(?:(?:(?P=indent).+)?\n)*(?P=indent)\"{3}))', re.MULTILINE, ) -HIGHLIGHT_RE = re.compile( - r'(?P' - r'^\.\. (?Phighlight):: (?P.*)\n' - r'( .*\n)*' - r')' - r'(?P(.*\n)*)', - re.MULTILINE, -) +HIGHLIGHT_RE = re.compile(r'^\.\. (?Phighlight):: (?P.*)$', re.MULTILINE) def merge_by_group(func): - @wraps(func) def func_wrapper(*args, **kwargs): blocks = {} @@ -107,7 +98,7 @@ def func_wrapper(filename, options, *args, **kwargs): def find_sourcecode(filename, options, src): contains_python_code = filename.split('.')[-1].startswith('py') source = SourceBlock.from_source(options.bootstrap, src) - source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else [source] + source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else source.split_by(HIGHLIGHT_RE) for source_block in source_blocks: inner_blocks = source_block.find_blocks(RST_RE) diff --git a/flake8_rst/sourceblock.py b/flake8_rst/sourceblock.py index 435ac13..5e46f56 100644 --- a/flake8_rst/sourceblock.py +++ b/flake8_rst/sourceblock.py @@ -81,7 +81,7 @@ def merge(cls, source_blocks): return cls(boot_lines, source_lines, directive=main_block.directive, language=main_block.language, roles=main_block.roles) - def __init__(self, boot_lines, source_lines, directive='', language='', roles=None): + def __init__(self, boot_lines, source_lines, directive='highlight', language='python3', roles=None): self._boot_lines = boot_lines self._source_lines = source_lines self.directive = directive @@ -134,6 +134,19 @@ def find_blocks(self, expression): source_block.remove_indentation() yield source_block + def split_by(self, expression): + src = self._source_lines + lines = [] + directive, language = self.directive, self.language + for line in src: + match = re.match(expression, line[SOURCE]) + if match: + if lines: + yield SourceBlock(self._boot_lines, lines, directive=directive, language=language, roles=self.roles) + lines.clear() + directive = match.group('directive') + language = match.group('language') + def remove_indentation(self): indentation = min(INDENT_RE.findall(self.source_block)) if indentation: diff --git a/tests/test_source_block.py b/tests/test_source_block.py index 6518c7c..7b291d9 100644 --- a/tests/test_source_block.py +++ b/tests/test_source_block.py @@ -7,7 +7,8 @@ except ImportError: import pathlib2 as pathlib -from flake8_rst.rst import RST_RE, apply_default_groupnames, apply_directive_specific_options, merge_by_group +from flake8_rst.rst import RST_RE, apply_default_groupnames, apply_directive_specific_options, merge_by_group, \ + HIGHLIGHT_RE from flake8_rst.sourceblock import SourceBlock, _extract_roles from hypothesis import assume, given, note, example from hypothesis import strategies as st @@ -52,6 +53,33 @@ def test_find_block(): assert block.source_block == origin_code +@given(st.lists(st.tuples(code_strategy, code_strategy), min_size=1)) +def test_split_block(blocks): + src = '\n'.join(('.. highlight:: {}\n\n{}'.format(language, source) for language, source in blocks)) + note(src) + code_block = SourceBlock.from_source('', src) + code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) + + for (language, source), block in zip(blocks, code_blocks): + assume('highlight' not in source) + assert block.language == language + assert block.directive == 'highlight' + assert block.source_block == source + + +@given(st.lists(code_strategy, min_size=1)) +def test_split_with_default_block(blocks): + src = '.. highlight:: python3\n\n'.join(blocks) + note(src) + code_block = SourceBlock.from_source('', src) + code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) + + for source, block in zip(blocks, code_blocks): + assert block.language == 'python3' + assert block.directive == 'highlight' + assert block.source_block == source + + def test_clean_doctest(): example = DATA_DIR / 'example_1.rst' src = example.open().read() From ea6ac7edb2fc1890b659d6f8589bec0cdc416eb7 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Mon, 14 Jan 2019 00:56:31 +0100 Subject: [PATCH 3/5] split source_blocks by pattern Signed-off-by: Fabian Haase --- flake8_rst/sourceblock.py | 6 +++++- tests/test_source_block.py | 29 ++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/flake8_rst/sourceblock.py b/flake8_rst/sourceblock.py index 5e46f56..f8b055c 100644 --- a/flake8_rst/sourceblock.py +++ b/flake8_rst/sourceblock.py @@ -143,9 +143,13 @@ def split_by(self, expression): if match: if lines: yield SourceBlock(self._boot_lines, lines, directive=directive, language=language, roles=self.roles) - lines.clear() + lines = [] directive = match.group('directive') language = match.group('language') + else: + lines.append(line) + if lines: + yield SourceBlock(self._boot_lines, lines, directive=directive, language=language, roles=self.roles) def remove_indentation(self): indentation = min(INDENT_RE.findall(self.source_block)) diff --git a/tests/test_source_block.py b/tests/test_source_block.py index 7b291d9..fbe34cd 100644 --- a/tests/test_source_block.py +++ b/tests/test_source_block.py @@ -16,7 +16,7 @@ ROOT_DIR = pathlib.Path(__file__).parent DATA_DIR = ROOT_DIR / 'data' -code_strategy = st.characters(blacklist_categories=['Cc']) +code_strategy = st.characters(whitelist_categories=['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']) @given(code_strategy, code_strategy) @@ -53,31 +53,34 @@ def test_find_block(): assert block.source_block == origin_code -@given(st.lists(st.tuples(code_strategy, code_strategy), min_size=1)) +highlight_language = st.sampled_from(('python3', 'sh', 'pytb')) + + +@given(st.lists(st.tuples(highlight_language, code_strategy), min_size=1)) def test_split_block(blocks): - src = '\n'.join(('.. highlight:: {}\n\n{}'.format(language, source) for language, source in blocks)) - note(src) + src = u''.join((u'.. highlight:: {}\n{}\n'.format(language, source) for language, source in blocks)) + code_block = SourceBlock.from_source('', src) code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) + assert len(code_blocks) == len(blocks) for (language, source), block in zip(blocks, code_blocks): assume('highlight' not in source) assert block.language == language assert block.directive == 'highlight' - assert block.source_block == source + assert block.source_block == source + '\n' -@given(st.lists(code_strategy, min_size=1)) -def test_split_with_default_block(blocks): - src = '.. highlight:: python3\n\n'.join(blocks) - note(src) - code_block = SourceBlock.from_source('', src) +@given(highlight_language, st.lists(code_strategy, min_size=1)) +def test_split_with_default_block(language, blocks): + src = u'\n.. highlight:: {}\n'.format(language).join(blocks) + '\n' + code_block = SourceBlock.from_source('', src, language=language) code_blocks = list(code_block.split_by(HIGHLIGHT_RE)) - + assert len(code_blocks) == len(blocks) for source, block in zip(blocks, code_blocks): - assert block.language == 'python3' + assert block.language == language assert block.directive == 'highlight' - assert block.source_block == source + assert block.source_block == source + '\n' def test_clean_doctest(): From 4c4c76e3fb9fb33d67cb3fd77bb77513ffc93812 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Mon, 14 Jan 2019 02:49:21 +0100 Subject: [PATCH 4/5] Check for simple code-blocks Signed-off-by: Fabian Haase --- flake8_rst/application.py | 4 ++++ flake8_rst/rst.py | 13 ++++++++----- tests/data/example_14.rst | 22 ++++++++++++++++++++++ tests/result_py2/result_14.py | 25 +++++++++++++++++++++++++ tests/result_py3/result_14.py | 25 +++++++++++++++++++++++++ tests/summary_py2/summary_14.txt | 21 +++++++++++++++++++++ tests/summary_py3/summary_14.txt | 21 +++++++++++++++++++++ tests/test_precisely.py | 2 +- 8 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 tests/data/example_14.rst create mode 100644 tests/result_py2/result_14.py create mode 100644 tests/result_py3/result_14.py create mode 100644 tests/summary_py2/summary_14.txt create mode 100644 tests/summary_py3/summary_14.txt diff --git a/flake8_rst/application.py b/flake8_rst/application.py index 1ea509e..8202a17 100644 --- a/flake8_rst/application.py +++ b/flake8_rst/application.py @@ -21,6 +21,10 @@ def __init__(self, program='flake8-rst', version=__version__): '--default-groupnames', default="*.rst->*: default", parse_from_config=True, help='Set default group names.', type='string', ) + self.option_manager.add_option( + '--highlight-languages', default=["python3", "py3", "pycon", "python", "py"], parse_from_config=True, + help='', type='string', + ) options.register_default_options(self.option_manager) def make_file_checker_manager(self): diff --git a/flake8_rst/rst.py b/flake8_rst/rst.py index 202d02a..855fe2d 100644 --- a/flake8_rst/rst.py +++ b/flake8_rst/rst.py @@ -34,7 +34,7 @@ re.MULTILINE, ) -HIGHLIGHT_RE = re.compile(r'^\.\. (?Phighlight):: (?P.*)$', re.MULTILINE) +HIGHLIGHT_RE = re.compile(r'^\.\. (?Phighlight):: (?P\w+)$', re.MULTILINE) def merge_by_group(func): @@ -100,12 +100,15 @@ def find_sourcecode(filename, options, src): source = SourceBlock.from_source(options.bootstrap, src) source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else source.split_by(HIGHLIGHT_RE) + highlight_languages = options.highlight_languages + for source_block in source_blocks: - inner_blocks = source_block.find_blocks(RST_RE) - if source_block.directive == 'highlight' and source_block.language in ['python3']: - inner_blocks = itertools.chain(inner_blocks, source_block.find_blocks(MARKER_RE)) + search_expression = {RST_RE} + if source_block.directive == 'highlight' and source_block.language in highlight_languages: + search_expression.add(MARKER_RE) + found_inner_block = False - for inner_block in inner_blocks: + for inner_block in itertools.chain.from_iterable((source_block.find_blocks(exp) for exp in search_expression)): found_inner_block = True inner_block.clean() yield inner_block diff --git a/tests/data/example_14.rst b/tests/data/example_14.rst new file mode 100644 index 0000000..e6559ff --- /dev/null +++ b/tests/data/example_14.rst @@ -0,0 +1,22 @@ +.. highlight:: sh + +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum:: + + cd /bin/lib/ + +.. highlight:: py + +Intermediate output:: + + # extract 100 LDA topics, using default parameters + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + +Final output :: + + # extract 100 LDA topics, using default parameters + lda = LdaModel(corpus=mm, id2word=id2word, + num_topics=100, distributed=distribution_required) + +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum. \ No newline at end of file diff --git a/tests/result_py2/result_14.py b/tests/result_py2/result_14.py new file mode 100644 index 0000000..7681c5e --- /dev/null +++ b/tests/result_py2/result_14.py @@ -0,0 +1,25 @@ +('test_precisely', + [('F821', + 13, + 9, + "undefined name 'LdbModel'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 25, + "undefined name 'mm'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 37, + "undefined name 'id2word'", + u' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', 18, 10, "undefined name 'LdaModel'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 26, "undefined name 'mm'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 38, "undefined name 'id2word'", u' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', + 19, + 47, + "undefined name 'distribution_required'", + u' num_topics=100, distributed=distribution_required)\n')], + {'logical lines': 4, 'physical lines': 5, 'tokens': 47}) diff --git a/tests/result_py3/result_14.py b/tests/result_py3/result_14.py new file mode 100644 index 0000000..87394b7 --- /dev/null +++ b/tests/result_py3/result_14.py @@ -0,0 +1,25 @@ +('test_precisely', + [('F821', + 13, + 9, + "undefined name 'LdbModel'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 25, + "undefined name 'mm'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', + 13, + 37, + "undefined name 'id2word'", + ' ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True)\n'), + ('F821', 18, 10, "undefined name 'LdaModel'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 26, "undefined name 'mm'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', 18, 38, "undefined name 'id2word'", ' lda = LdaModel(corpus=mm, id2word=id2word,\n'), + ('F821', + 19, + 47, + "undefined name 'distribution_required'", + ' num_topics=100, distributed=distribution_required)\n')], + {'logical lines': 4, 'physical lines': 5, 'tokens': 47}) diff --git a/tests/summary_py2/summary_14.txt b/tests/summary_py2/summary_14.txt new file mode 100644 index 0000000..9be520c --- /dev/null +++ b/tests/summary_py2/summary_14.txt @@ -0,0 +1,21 @@ +./data/example_14.rst:13:10: F821 undefined name 'LdbModel' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:26: F821 undefined name 'mm' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:38: F821 undefined name 'id2word' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:18:11: F821 undefined name 'LdaModel' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:27: F821 undefined name 'mm' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:39: F821 undefined name 'id2word' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:19:48: F821 undefined name 'distribution_required' + num_topics=100, distributed=distribution_required) + ^ diff --git a/tests/summary_py3/summary_14.txt b/tests/summary_py3/summary_14.txt new file mode 100644 index 0000000..9be520c --- /dev/null +++ b/tests/summary_py3/summary_14.txt @@ -0,0 +1,21 @@ +./data/example_14.rst:13:10: F821 undefined name 'LdbModel' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:26: F821 undefined name 'mm' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:13:38: F821 undefined name 'id2word' + ldb = LdbModel(corpus=mm, id2word=id2word, num_topics=100, distributed=True) + ^ +./data/example_14.rst:18:11: F821 undefined name 'LdaModel' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:27: F821 undefined name 'mm' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:18:39: F821 undefined name 'id2word' + lda = LdaModel(corpus=mm, id2word=id2word, + ^ +./data/example_14.rst:19:48: F821 undefined name 'distribution_required' + num_topics=100, distributed=distribution_required) + ^ diff --git a/tests/test_precisely.py b/tests/test_precisely.py index 23a63ea..f7cfb7d 100644 --- a/tests/test_precisely.py +++ b/tests/test_precisely.py @@ -8,7 +8,7 @@ @pytest.fixture() def options(mocker): return mocker.Mock(max_line_length=80, verbose=0, hang_closing=False, - ignore=[], bootstrap=None, default_groupnames='*.rst->*: default') + ignore=[], bootstrap=None, default_groupnames='*.rst->*: default', highlight_languages=['py']) @pytest.fixture() From 0b3e1a47340d44d3b8226607dbc02a884a28acee Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Mon, 14 Jan 2019 03:12:31 +0100 Subject: [PATCH 5/5] Add options to customize literal-block check Signed-off-by: Fabian Haase --- flake8_rst/application.py | 9 +++++++-- flake8_rst/rst.py | 10 +++++++--- tests/test_precisely.py | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/flake8_rst/application.py b/flake8_rst/application.py index 8202a17..0a187e1 100644 --- a/flake8_rst/application.py +++ b/flake8_rst/application.py @@ -22,8 +22,13 @@ def __init__(self, program='flake8-rst', version=__version__): help='Set default group names.', type='string', ) self.option_manager.add_option( - '--highlight-languages', default=["python3", "py3", "pycon", "python", "py"], parse_from_config=True, - help='', type='string', + '--check-languages', default=["python3", "py3", "pycon", "python", "py"], parse_from_config=True, + help='List of highlight-languages which are checked when using literal code-blocks (::).', type='string', + ) + self.option_manager.add_option( + '--highlight-language', default="python3", parse_from_config=True, + help='Default language when no `.. highlight::` directive was used yet. Corresponds to' + '`highlight-language` config variable of sphinx.', type='string', ) options.register_default_options(self.option_manager) diff --git a/flake8_rst/rst.py b/flake8_rst/rst.py index 855fe2d..1bcb631 100644 --- a/flake8_rst/rst.py +++ b/flake8_rst/rst.py @@ -92,19 +92,23 @@ def func_wrapper(filename, options, *args, **kwargs): return func_wrapper +def should_check_for_literal_blocks(source_block, highlight_languages): + return source_block.directive == 'highlight' and source_block.language in highlight_languages + + @apply_directive_specific_options @merge_by_group @apply_default_groupnames def find_sourcecode(filename, options, src): contains_python_code = filename.split('.')[-1].startswith('py') - source = SourceBlock.from_source(options.bootstrap, src) + source = SourceBlock.from_source(options.bootstrap, src, language=options.highlight_language) source_blocks = source.find_blocks(DOCSTRING_RE) if contains_python_code else source.split_by(HIGHLIGHT_RE) - highlight_languages = options.highlight_languages + highlight_languages = options.check_languages for source_block in source_blocks: search_expression = {RST_RE} - if source_block.directive == 'highlight' and source_block.language in highlight_languages: + if should_check_for_literal_blocks(source_block, highlight_languages): search_expression.add(MARKER_RE) found_inner_block = False diff --git a/tests/test_precisely.py b/tests/test_precisely.py index f7cfb7d..3f68b2c 100644 --- a/tests/test_precisely.py +++ b/tests/test_precisely.py @@ -7,8 +7,8 @@ @pytest.fixture() def options(mocker): - return mocker.Mock(max_line_length=80, verbose=0, hang_closing=False, - ignore=[], bootstrap=None, default_groupnames='*.rst->*: default', highlight_languages=['py']) + return mocker.Mock(max_line_length=80, verbose=0, hang_closing=False, highlight_language='python3', + ignore=[], bootstrap=None, default_groupnames='*.rst->*: default', check_languages=['py']) @pytest.fixture()