From 1e3f293f0910c0721d873349c834f40f5a0d6f70 Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Fri, 18 Jul 2025 16:01:21 +0500 Subject: [PATCH 1/8] added was/were check for reports and presentations --- .../checks/presentation_checks/__init__.py | 1 + .../presentation_checks/was_were_check.py | 31 +++++++++++++++++ app/main/checks/report_checks/__init__.py | 1 + .../checks/report_checks/was_were_check.py | 34 +++++++++++++++++++ app/nlp/is_passive_was_were_sentence.py | 26 ++++++++++++++ 5 files changed, 93 insertions(+) create mode 100644 app/main/checks/presentation_checks/was_were_check.py create mode 100644 app/main/checks/report_checks/was_were_check.py create mode 100644 app/nlp/is_passive_was_were_sentence.py diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index 52bd5f73..2d2b9459 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -17,3 +17,4 @@ from .name_of_image_check import PresImageCaptureCheck from .task_tracker import TaskTracker from .overview_in_tasks import OverviewInTasks +from .was_were_check import PresWasWereCheck diff --git a/app/main/checks/presentation_checks/was_were_check.py b/app/main/checks/presentation_checks/was_were_check.py new file mode 100644 index 00000000..50b8efe3 --- /dev/null +++ b/app/main/checks/presentation_checks/was_were_check.py @@ -0,0 +1,31 @@ +import re +from ..base_check import BasePresCriterion, answer +from app.nlp.is_passive_was_were_sentence import is_passive_was_were_sentece + +class PresWasWereCheck(BasePresCriterion): + label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' + description = '' + id = 'pres_was_were_check' + + def __init__(self, file_info): + super().__init__(file_info) + + def check(self): + detected = {} + for slide_index, slide_text in enumerate(self.file.get_text_from_slides()): + mock_slide_text = "Было проведено исследование. Было бы здорово. Как бы было здорово. Была проделана работа. Были сделаны шаги..." + sentences = re.split(r'(?<=[.!?…])\s+', mock_slide_text) + for sentence_index, sentence in enumerate(sentences): + if is_passive_was_were_sentece(sentence): + if slide_index not in detected: + detected[slide_index] = [] + detected[slide_index].append(f'{sentence_index+1}: {sentence}') + if len(detected): + result_str = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' + for slide_index, messages in detected.items(): + result_str += f'Слайд №{slide_index+1}:
' + '
'.join(messages) + '

' + result_score = 0 + else: + result_str = 'Пройдена!' + result_score = 1 + return answer(result_score, result_str) \ No newline at end of file diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index 0ed2a8dc..74b46890 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -32,3 +32,4 @@ from .sw_section_size import SWSectionSizeCheck from .sw_keywords_check import SWKeywordsCheck from .task_tracker import ReportTaskTracker +from .was_were_check import ReportWasWereCheck \ No newline at end of file diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py new file mode 100644 index 00000000..b19d6ab6 --- /dev/null +++ b/app/main/checks/report_checks/was_were_check.py @@ -0,0 +1,34 @@ +import re +from ..base_check import BaseReportCriterion, answer +from app.nlp.is_passive_was_were_sentence import is_passive_was_were_sentece + +class ReportWasWereCheck(BaseReportCriterion): + label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' + description = '' + id = 'report_was_were_check' + + def __init__(self, file_info): + super().__init__(file_info) + + def check(self): + if self.file.page_counter() < 4: + return answer(False, 'В отчёте недостаточно страниц. Нечего проверять.') + detected = {} + for page_index, page_text in self.file.pdf_file.get_text_on_page().items(): + sentences = re.split(r'(?<=[.!?…])\s+', page_text) + for sentence_index, sentence in enumerate(sentences): + if is_passive_was_were_sentece(sentence): + if page_index not in detected: + detected[page_index] = [] + detected[page_index].append(f'{sentence_index+1}: {sentence}') + if len(detected): + result_str = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' + for page_index, messages in detected.items(): + result_str += f'Страница №{page_index+1}:
' + '
'.join(messages) + '

' + print(f'Страница №{page_index+1}:
' + '
'.join(messages) + '

') + print() + result_score = 0 + else: + result_str = 'Пройдена!' + result_score = 1 + return answer(result_score, result_str) \ No newline at end of file diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py new file mode 100644 index 00000000..8180358e --- /dev/null +++ b/app/nlp/is_passive_was_were_sentence.py @@ -0,0 +1,26 @@ +import re +import pymorphy2 +import string + +morph = pymorphy2.MorphAnalyzer() + +def clean_word(word): + punct = string.punctuation.replace('-', '') + return word.translate(str.maketrans('', '', punct)) + +def is_passive_was_were_sentece(sentence): + first_words = re.split(r'\s+', sentence.strip(), maxsplit=2) + if len(first_words) < 2: + return False + + first_word = clean_word(first_words[0]) + second_word = clean_word(first_words[1]) + + parsed = morph.parse(first_word)[0] + if (parsed.normal_form == 'быть' and + 'past' in parsed.tag and + parsed.tag.POS == 'VERB'): + second_word_parsed = morph.parse(second_word)[0] + return ('PRTS' in second_word_parsed.tag and + 'pssv' in second_word_parsed.tag) + return False \ No newline at end of file From b663c402c71a47797c166859e13b66e936d2836a Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Thu, 14 Aug 2025 14:53:38 +0500 Subject: [PATCH 2/8] took out general logic and added a threshold value --- .../presentation_checks/was_were_check.py | 22 ++----- .../checks/report_checks/was_were_check.py | 23 ++----- app/nlp/is_passive_was_were_sentence.py | 62 ++++++++++++++++++- 3 files changed, 73 insertions(+), 34 deletions(-) diff --git a/app/main/checks/presentation_checks/was_were_check.py b/app/main/checks/presentation_checks/was_were_check.py index 50b8efe3..e9f8a026 100644 --- a/app/main/checks/presentation_checks/was_were_check.py +++ b/app/main/checks/presentation_checks/was_were_check.py @@ -1,29 +1,19 @@ -import re from ..base_check import BasePresCriterion, answer -from app.nlp.is_passive_was_were_sentence import is_passive_was_were_sentece +from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences class PresWasWereCheck(BasePresCriterion): label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' description = '' id = 'pres_was_were_check' - def __init__(self, file_info): + def __init__(self, file_info, threshold=3): super().__init__(file_info) + self.threshold = threshold def check(self): - detected = {} - for slide_index, slide_text in enumerate(self.file.get_text_from_slides()): - mock_slide_text = "Было проведено исследование. Было бы здорово. Как бы было здорово. Была проделана работа. Были сделаны шаги..." - sentences = re.split(r'(?<=[.!?…])\s+', mock_slide_text) - for sentence_index, sentence in enumerate(sentences): - if is_passive_was_were_sentece(sentence): - if slide_index not in detected: - detected[slide_index] = [] - detected[slide_index].append(f'{sentence_index+1}: {sentence}') - if len(detected): - result_str = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' - for slide_index, messages in detected.items(): - result_str += f'Слайд №{slide_index+1}:
' + '
'.join(messages) + '

' + detected_sentences, total_sentences = get_was_were_sentences(self.file, CritreriaType.PRESENTATION) + if total_sentences > self.threshold: + result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION) result_score = 0 else: result_str = 'Пройдена!' diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py index b19d6ab6..9adac096 100644 --- a/app/main/checks/report_checks/was_were_check.py +++ b/app/main/checks/report_checks/was_were_check.py @@ -1,32 +1,21 @@ -import re from ..base_check import BaseReportCriterion, answer -from app.nlp.is_passive_was_were_sentence import is_passive_was_were_sentece +from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences class ReportWasWereCheck(BaseReportCriterion): label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' description = '' id = 'report_was_were_check' - def __init__(self, file_info): + def __init__(self, file_info, threshold=3): super().__init__(file_info) + self.threshold = threshold def check(self): if self.file.page_counter() < 4: return answer(False, 'В отчёте недостаточно страниц. Нечего проверять.') - detected = {} - for page_index, page_text in self.file.pdf_file.get_text_on_page().items(): - sentences = re.split(r'(?<=[.!?…])\s+', page_text) - for sentence_index, sentence in enumerate(sentences): - if is_passive_was_were_sentece(sentence): - if page_index not in detected: - detected[page_index] = [] - detected[page_index].append(f'{sentence_index+1}: {sentence}') - if len(detected): - result_str = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' - for page_index, messages in detected.items(): - result_str += f'Страница №{page_index+1}:
' + '
'.join(messages) + '

' - print(f'Страница №{page_index+1}:
' + '
'.join(messages) + '

') - print() + detected, total_sentences = get_was_were_sentences(self.file, CritreriaType.REPORT) + if total_sentences > self.threshold: + result_str = generate_output_text(detected, CritreriaType.REPORT) result_score = 0 else: result_str = 'Пройдена!' diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py index 8180358e..ded15d96 100644 --- a/app/nlp/is_passive_was_were_sentence.py +++ b/app/nlp/is_passive_was_were_sentence.py @@ -1,14 +1,51 @@ import re import pymorphy2 import string +from enum import Enum morph = pymorphy2.MorphAnalyzer() + +class CritreriaType(Enum): + REPORT=0 + PRESENTATION=1 + + +def criteria_type_to_str(type: CritreriaType): + if type == CritreriaType.REPORT: + return "Страница" + elif type == CritreriaType.PRESENTATION: + return "Слайд" + else: + return "Элемент" + +def get_content_by_file(file, type: CritreriaType): + if type == CritreriaType.REPORT: + return file.pdf_file.get_text_on_page().items() + elif type == CritreriaType.PRESENTATION: + return enumerate(file.get_text_from_slides()) + def clean_word(word): punct = string.punctuation.replace('-', '') return word.translate(str.maketrans('', '', punct)) + def is_passive_was_were_sentece(sentence): + """ + Примеры плохих предложений (пассивные конструкции с "Был*" - можно убрать): + - Был проведен анализ данных + - Была выполнена работа по исследованию + - Было принято решение о внедрении + - Были получены следующие результаты + - Была создана база данных + + Примеры хороших предложений ("Был*" нельзя убрать): + - Было бы здорово получить новые данные + - Был сильный скачок напряжения + - Были времена, когда это казалось невозможным + - Был студентом университета три года назад + - Была программистом до выхода на пенсию + """ first_words = re.split(r'\s+', sentence.strip(), maxsplit=2) if len(first_words) < 2: return False @@ -23,4 +60,27 @@ def is_passive_was_were_sentece(sentence): second_word_parsed = morph.parse(second_word)[0] return ('PRTS' in second_word_parsed.tag and 'pssv' in second_word_parsed.tag) - return False \ No newline at end of file + return False + + +def generate_output_text(detected_senteces, type: CritreriaType): + output = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' + for index, messages in detected_senteces.items(): + output_type = criteria_type_to_str(type) + output += f'{output_type} №{index + 1}:
' + '
'.join(messages) + '

' + return output + + +def get_was_were_sentences(file, type: CritreriaType): + detected = {} + total_sentences = 0 + for page_index, page_text in get_content_by_file(file, type): + sentences = re.split(r'(?<=[.!?…])\s+', page_text) + for sentence_index, sentence in enumerate(sentences): + if is_passive_was_were_sentece(sentence): + total_sentences += 1 + if page_index not in detected: + detected[page_index] = [] + truncated_sentence = sentence[:30] + '...' if len(sentence) > 30 else sentence + detected[page_index].append(f'{sentence_index+1}: {truncated_sentence}') + return detected, total_sentences \ No newline at end of file From 20f1842721ef476607081d29c1952442b1d2bbb0 Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Sat, 15 Nov 2025 20:28:50 +0300 Subject: [PATCH 3/8] fixed report_was_were_check --- .../checks/report_checks/was_were_check.py | 2 +- app/nlp/is_passive_was_were_sentence.py | 39 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py index 9adac096..fbb1a9f8 100644 --- a/app/main/checks/report_checks/was_were_check.py +++ b/app/main/checks/report_checks/was_were_check.py @@ -15,7 +15,7 @@ def check(self): return answer(False, 'В отчёте недостаточно страниц. Нечего проверять.') detected, total_sentences = get_was_were_sentences(self.file, CritreriaType.REPORT) if total_sentences > self.threshold: - result_str = generate_output_text(detected, CritreriaType.REPORT) + result_str = generate_output_text(detected, CritreriaType.REPORT, self.format_page_link) result_score = 0 else: result_str = 'Пройдена!' diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py index ded15d96..3c4206ae 100644 --- a/app/nlp/is_passive_was_were_sentence.py +++ b/app/nlp/is_passive_was_were_sentence.py @@ -7,8 +7,8 @@ class CritreriaType(Enum): - REPORT=0 - PRESENTATION=1 + REPORT = 'report' + PRESENTATION = 'pres' def criteria_type_to_str(type: CritreriaType): @@ -63,11 +63,14 @@ def is_passive_was_were_sentece(sentence): return False -def generate_output_text(detected_senteces, type: CritreriaType): +def generate_output_text(detected_senteces, type: CritreriaType, format_page_link_fn=None): output = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' for index, messages in detected_senteces.items(): output_type = criteria_type_to_str(type) - output += f'{output_type} №{index + 1}:
' + '
'.join(messages) + '

' + if format_page_link_fn: + output += f'{output_type} {format_page_link_fn([index])}:
' + '
'.join(messages) + '

' + else: + output += f'{output_type} №{index}:
' + '
'.join(messages) + '

' return output @@ -75,12 +78,24 @@ def get_was_were_sentences(file, type: CritreriaType): detected = {} total_sentences = 0 for page_index, page_text in get_content_by_file(file, type): - sentences = re.split(r'(?<=[.!?…])\s+', page_text) - for sentence_index, sentence in enumerate(sentences): - if is_passive_was_were_sentece(sentence): - total_sentences += 1 - if page_index not in detected: - detected[page_index] = [] - truncated_sentence = sentence[:30] + '...' if len(sentence) > 30 else sentence - detected[page_index].append(f'{sentence_index+1}: {truncated_sentence}') + lines = page_text.split('\n') + for line_index, line in enumerate(lines): + line = line.strip() + if not line: + continue + + sentences = re.split(r'[.!?…]+\s*', line) + + for sentence in sentences: + sentence = sentence.strip() + if not sentence: + continue + + if is_passive_was_were_sentece(sentence): + total_sentences += 1 + if page_index not in detected: + detected[page_index] = [] + truncated_sentence = sentence[:50] + '...' if len(sentence) > 50 else sentence + detected[page_index].append(f'Строка {line_index+1}: {truncated_sentence}') + return detected, total_sentences \ No newline at end of file From 9087e34f1b15bbb08880927ec86901daf28b4fb9 Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Sat, 15 Nov 2025 22:14:44 +0300 Subject: [PATCH 4/8] fixed pres_was_were_check --- .../presentation_checks/was_were_check.py | 2 +- app/nlp/is_passive_was_were_sentence.py | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/app/main/checks/presentation_checks/was_were_check.py b/app/main/checks/presentation_checks/was_were_check.py index e9f8a026..58caa450 100644 --- a/app/main/checks/presentation_checks/was_were_check.py +++ b/app/main/checks/presentation_checks/was_were_check.py @@ -13,7 +13,7 @@ def __init__(self, file_info, threshold=3): def check(self): detected_sentences, total_sentences = get_was_were_sentences(self.file, CritreriaType.PRESENTATION) if total_sentences > self.threshold: - result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION) + result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION, self.format_page_link) result_score = 0 else: result_str = 'Пройдена!' diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py index 3c4206ae..4a64fb09 100644 --- a/app/nlp/is_passive_was_were_sentence.py +++ b/app/nlp/is_passive_was_were_sentence.py @@ -65,12 +65,17 @@ def is_passive_was_were_sentece(sentence): def generate_output_text(detected_senteces, type: CritreriaType, format_page_link_fn=None): output = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' + if type == CritreriaType.REPORT: + offset_index = 0 + elif type == CritreriaType.PRESENTATION: + offset_index = 1 for index, messages in detected_senteces.items(): + display_index = index + offset_index output_type = criteria_type_to_str(type) if format_page_link_fn: - output += f'{output_type} {format_page_link_fn([index])}:
' + '
'.join(messages) + '

' + output += f'{output_type} {format_page_link_fn([display_index])}:
' + '
'.join(messages) + '

' else: - output += f'{output_type} №{index}:
' + '
'.join(messages) + '

' + output += f'{output_type} №{display_index}:
' + '
'.join(messages) + '

' return output @@ -78,12 +83,15 @@ def get_was_were_sentences(file, type: CritreriaType): detected = {} total_sentences = 0 for page_index, page_text in get_content_by_file(file, type): - lines = page_text.split('\n') + lines = re.split(r'\n', page_text) + non_empty_line_counter = 0 for line_index, line in enumerate(lines): + print(line_index, line) line = line.strip() if not line: continue - + + non_empty_line_counter += 1 sentences = re.split(r'[.!?…]+\s*', line) for sentence in sentences: @@ -96,6 +104,10 @@ def get_was_were_sentences(file, type: CritreriaType): if page_index not in detected: detected[page_index] = [] truncated_sentence = sentence[:50] + '...' if len(sentence) > 50 else sentence - detected[page_index].append(f'Строка {line_index+1}: {truncated_sentence}') + if type == CritreriaType.PRESENTATION: + err_str = f'Строка {non_empty_line_counter}: {truncated_sentence}' + elif type == CritreriaType.REPORT: + err_str = f'Строка {line_index+1}: {truncated_sentence}' + detected[page_index].append(err_str) return detected, total_sentences \ No newline at end of file From 879b90af5e62e17fa7cd7aca9e55762c490bb56c Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Thu, 29 Jan 2026 22:26:12 +0300 Subject: [PATCH 5/8] fixed: usage pymorphy3 and write detected sentences for case when test passed --- app/main/checks/presentation_checks/was_were_check.py | 8 +++++++- app/main/checks/report_checks/was_were_check.py | 8 +++++++- app/nlp/is_passive_was_were_sentence.py | 4 ++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/app/main/checks/presentation_checks/was_were_check.py b/app/main/checks/presentation_checks/was_were_check.py index 58caa450..9d99e830 100644 --- a/app/main/checks/presentation_checks/was_were_check.py +++ b/app/main/checks/presentation_checks/was_were_check.py @@ -16,6 +16,12 @@ def check(self): result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION, self.format_page_link) result_score = 0 else: - result_str = 'Пройдена!' + if total_sentences != 0: + result_str = ( + generate_output_text(detected_sentences, CritreriaType.PRESENTATION, self.format_page_link) + + 'Пройдена! (но найдены конструкции, которые можно убрать без потери смысла)' + ) + else: + result_str = 'Пройдена!' result_score = 1 return answer(result_score, result_str) \ No newline at end of file diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py index fbb1a9f8..dbd6ce1a 100644 --- a/app/main/checks/report_checks/was_were_check.py +++ b/app/main/checks/report_checks/was_were_check.py @@ -18,6 +18,12 @@ def check(self): result_str = generate_output_text(detected, CritreriaType.REPORT, self.format_page_link) result_score = 0 else: - result_str = 'Пройдена!' + if total_sentences != 0: + result_str = ( + generate_output_text(detected, CritreriaType.REPORT, self.format_page_link) + + 'Пройдена! (но найдены конструкции, которые можно убрать без потери смысла)' + ) + else: + result_str = 'Пройдена!' result_score = 1 return answer(result_score, result_str) \ No newline at end of file diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py index 4a64fb09..e7f0f6be 100644 --- a/app/nlp/is_passive_was_were_sentence.py +++ b/app/nlp/is_passive_was_were_sentence.py @@ -1,9 +1,9 @@ import re -import pymorphy2 +import pymorphy3 import string from enum import Enum -morph = pymorphy2.MorphAnalyzer() +morph = pymorphy3.MorphAnalyzer() class CritreriaType(Enum): From 9a758ab96c013688d74aaa3b7894b4e3d2b1936e Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Mon, 23 Mar 2026 01:06:24 +0300 Subject: [PATCH 6/8] fixed --- .../presentation_checks/was_were_check.py | 29 ++---- .../checks/report_checks/was_were_check.py | 34 +++---- app/nlp/is_passive_was_were_sentence.py | 91 +++---------------- 3 files changed, 33 insertions(+), 121 deletions(-) diff --git a/app/main/checks/presentation_checks/was_were_check.py b/app/main/checks/presentation_checks/was_were_check.py index 9d99e830..acb39c60 100644 --- a/app/main/checks/presentation_checks/was_were_check.py +++ b/app/main/checks/presentation_checks/was_were_check.py @@ -1,27 +1,18 @@ from ..base_check import BasePresCriterion, answer -from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences +from app.utils.was_were_check import WasWereChecker + class PresWasWereCheck(BasePresCriterion): - label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' - description = '' - id = 'pres_was_were_check' + label = "Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла" + id = "pres_was_were_check" def __init__(self, file_info, threshold=3): super().__init__(file_info) self.threshold = threshold - + self.checker = WasWereChecker(file_info, threshold) + def check(self): - detected_sentences, total_sentences = get_was_were_sentences(self.file, CritreriaType.PRESENTATION) - if total_sentences > self.threshold: - result_str = generate_output_text(detected_sentences, CritreriaType.PRESENTATION, self.format_page_link) - result_score = 0 - else: - if total_sentences != 0: - result_str = ( - generate_output_text(detected_sentences, CritreriaType.PRESENTATION, self.format_page_link) - + 'Пройдена! (но найдены конструкции, которые можно убрать без потери смысла)' - ) - else: - result_str = 'Пройдена!' - result_score = 1 - return answer(result_score, result_str) \ No newline at end of file + message, score = self.checker.get_result_msg_and_score( + self.file, self.format_page_link + ) + return answer(score, message) diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py index dbd6ce1a..28c3abd8 100644 --- a/app/main/checks/report_checks/was_were_check.py +++ b/app/main/checks/report_checks/was_were_check.py @@ -1,29 +1,19 @@ from ..base_check import BaseReportCriterion, answer -from app.nlp.is_passive_was_were_sentence import CritreriaType, generate_output_text, get_was_were_sentences +from app.utils.was_were_check import WasWereChecker + class ReportWasWereCheck(BaseReportCriterion): - label = 'Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла' - description = '' - id = 'report_was_were_check' + label = "Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла" + _description = "Предложения начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла" + id = "report_was_were_check" def __init__(self, file_info, threshold=3): super().__init__(file_info) - self.threshold = threshold - + self.checker = WasWereChecker(file_info, threshold) + def check(self): - if self.file.page_counter() < 4: - return answer(False, 'В отчёте недостаточно страниц. Нечего проверять.') - detected, total_sentences = get_was_were_sentences(self.file, CritreriaType.REPORT) - if total_sentences > self.threshold: - result_str = generate_output_text(detected, CritreriaType.REPORT, self.format_page_link) - result_score = 0 - else: - if total_sentences != 0: - result_str = ( - generate_output_text(detected, CritreriaType.REPORT, self.format_page_link) - + 'Пройдена! (но найдены конструкции, которые можно убрать без потери смысла)' - ) - else: - result_str = 'Пройдена!' - result_score = 1 - return answer(result_score, result_str) \ No newline at end of file + message, score = self.checker.get_result_msg_and_score( + self.file, self.format_page_link + ) + return answer(score, message) + diff --git a/app/nlp/is_passive_was_were_sentence.py b/app/nlp/is_passive_was_were_sentence.py index e7f0f6be..9878e5b4 100644 --- a/app/nlp/is_passive_was_were_sentence.py +++ b/app/nlp/is_passive_was_were_sentence.py @@ -1,35 +1,10 @@ import re import pymorphy3 import string -from enum import Enum morph = pymorphy3.MorphAnalyzer() -class CritreriaType(Enum): - REPORT = 'report' - PRESENTATION = 'pres' - - -def criteria_type_to_str(type: CritreriaType): - if type == CritreriaType.REPORT: - return "Страница" - elif type == CritreriaType.PRESENTATION: - return "Слайд" - else: - return "Элемент" - -def get_content_by_file(file, type: CritreriaType): - if type == CritreriaType.REPORT: - return file.pdf_file.get_text_on_page().items() - elif type == CritreriaType.PRESENTATION: - return enumerate(file.get_text_from_slides()) - -def clean_word(word): - punct = string.punctuation.replace('-', '') - return word.translate(str.maketrans('', '', punct)) - - def is_passive_was_were_sentece(sentence): """ Примеры плохих предложений (пассивные конструкции с "Был*" - можно убрать): @@ -38,7 +13,7 @@ def is_passive_was_were_sentece(sentence): - Было принято решение о внедрении - Были получены следующие результаты - Была создана база данных - + Примеры хороших предложений ("Был*" нельзя убрать): - Было бы здорово получить новые данные - Был сильный скачок напряжения @@ -46,7 +21,7 @@ def is_passive_was_were_sentece(sentence): - Был студентом университета три года назад - Была программистом до выхода на пенсию """ - first_words = re.split(r'\s+', sentence.strip(), maxsplit=2) + first_words = re.split(r"\s+", sentence.strip(), maxsplit=2) if len(first_words) < 2: return False @@ -54,60 +29,16 @@ def is_passive_was_were_sentece(sentence): second_word = clean_word(first_words[1]) parsed = morph.parse(first_word)[0] - if (parsed.normal_form == 'быть' and - 'past' in parsed.tag and - parsed.tag.POS == 'VERB'): + if ( + parsed.normal_form == "быть" + and "past" in parsed.tag + and parsed.tag.POS == "VERB" + ): second_word_parsed = morph.parse(second_word)[0] - return ('PRTS' in second_word_parsed.tag and - 'pssv' in second_word_parsed.tag) + return "PRTS" in second_word_parsed.tag and "pssv" in second_word_parsed.tag return False -def generate_output_text(detected_senteces, type: CritreriaType, format_page_link_fn=None): - output = 'Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

' - if type == CritreriaType.REPORT: - offset_index = 0 - elif type == CritreriaType.PRESENTATION: - offset_index = 1 - for index, messages in detected_senteces.items(): - display_index = index + offset_index - output_type = criteria_type_to_str(type) - if format_page_link_fn: - output += f'{output_type} {format_page_link_fn([display_index])}:
' + '
'.join(messages) + '

' - else: - output += f'{output_type} №{display_index}:
' + '
'.join(messages) + '

' - return output - - -def get_was_were_sentences(file, type: CritreriaType): - detected = {} - total_sentences = 0 - for page_index, page_text in get_content_by_file(file, type): - lines = re.split(r'\n', page_text) - non_empty_line_counter = 0 - for line_index, line in enumerate(lines): - print(line_index, line) - line = line.strip() - if not line: - continue - - non_empty_line_counter += 1 - sentences = re.split(r'[.!?…]+\s*', line) - - for sentence in sentences: - sentence = sentence.strip() - if not sentence: - continue - - if is_passive_was_were_sentece(sentence): - total_sentences += 1 - if page_index not in detected: - detected[page_index] = [] - truncated_sentence = sentence[:50] + '...' if len(sentence) > 50 else sentence - if type == CritreriaType.PRESENTATION: - err_str = f'Строка {non_empty_line_counter}: {truncated_sentence}' - elif type == CritreriaType.REPORT: - err_str = f'Строка {line_index+1}: {truncated_sentence}' - detected[page_index].append(err_str) - - return detected, total_sentences \ No newline at end of file +def clean_word(word): + punct = string.punctuation.replace("-", "") + return word.translate(str.maketrans("", "", punct)) From 86b2ddce13ec1990aa907b466b2191f82759cea6 Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Mon, 23 Mar 2026 20:00:30 +0300 Subject: [PATCH 7/8] brought out the general logic --- app/main/check_packs/pack_config.py | 21 +---- .../checks/report_checks/was_were_check.py | 1 - app/settings.py | 45 ---------- app/utils/was_were_check.py | 88 +++++++++++++++++++ 4 files changed, 89 insertions(+), 66 deletions(-) delete mode 100644 app/settings.py create mode 100644 app/utils/was_were_check.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index d2579f6d..ae25c4e5 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -2,26 +2,7 @@ BASE_PRES_CRITERION = [ ['template_name'], - ['slides_number', {'slides_number': [10, 12], 'detect_additional': True}], - ['slides_enum'], - ['slides_headers'], - ['find_slides', {'key_slide': 'Цель и задачи'}], - ['find_slides', {'key_slide': 'Апробация'}], - ['find_on_slide', {'key_slide': ['Актуальность', 'Актуальности', 'Актуальностью']}], - ['find_slides', {'key_slide': 'Заключение'}], - ['slide_every_task', {'min_percent': 70}], - ['conclusion_actual', {'min_percent': 70}], - ['pres_right_words'], - ['pres_image_share'], - ['future_dev'], - ['pres_banned_words_check'], - ['pres_empty_slide'], - ['theme_in_pres_check'], - ['verify_git_link'], - ["slide_headers_duplication_check"], - ['pres_image_capture'], - ['task_tracker'], - ['overview_in_tasks'], + ['pres_aspect_ratio_check', {'correct_ratios': ['4:3', '16:9']}], ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/report_checks/was_were_check.py b/app/main/checks/report_checks/was_were_check.py index 28c3abd8..c4177343 100644 --- a/app/main/checks/report_checks/was_were_check.py +++ b/app/main/checks/report_checks/was_were_check.py @@ -16,4 +16,3 @@ def check(self): self.file, self.format_page_link ) return answer(score, message) - diff --git a/app/settings.py b/app/settings.py deleted file mode 100644 index 1a54230b..00000000 --- a/app/settings.py +++ /dev/null @@ -1,45 +0,0 @@ -import configparser -import json -import os - -from lti_session_passback.lti.utils import parse_consumer_info - -# read ini file -current_ph = os.path.dirname(os.path.abspath(__file__)) -ini_file = os.path.join(current_ph, 'config.ini') -config = configparser.ConfigParser() -config.read(ini_file) - -# read version file -project_ph = os.path.dirname(current_ph) -version_file = os.path.join(project_ph, "app", "VERSION.json") -try: - with open(version_file) as vfp: - json_string = vfp.read() - VERSION_DATA = json.loads(json_string) -except json.decoder.JSONDecodeError as error: - VERSION_DATA = { - "error": str(error), - "data": error.doc - } -except IOError as error: - VERSION_DATA = {"error": f"{error.strerror}: {error.filename}"} -except Exception as error: - VERSION_DATA = {"error": repr(error)} - -# setup variables -ADMIN_PASSWORD = os.environ.get('ADMIN_PASSWORD', '') - - -SECRET_KEY = os.environ.get('SECRET_KEY', '') -SIGNUP_PAGE_ENABLED = os.environ.get('SIGNUP_PAGE_ENABLED', 'True') == 'True' - -MAX_CONTENT_LENGTH = config.getint('consts', 'MAX_CONTENT_LENGTH') * 1024 * 1024 -MAX_SYSTEM_STORAGE = config.getint('consts', 'MAX_SYSTEM_STORAGE') * 1024 * 1024 - -DEBUG_AUTH = False -consumer_keys = os.environ.get('CONSUMER_KEY', '') -consumer_secrets = os.environ.get('CONSUMER_SECRET', '') -if consumer_keys == '' or consumer_secrets == '': - raise Exception('Required CONSUMER_KEY or CONSUMER_SECRET missing') -LTI_CONSUMERS = parse_consumer_info(consumer_keys, consumer_secrets) diff --git a/app/utils/was_were_check.py b/app/utils/was_were_check.py new file mode 100644 index 00000000..8d810a1d --- /dev/null +++ b/app/utils/was_were_check.py @@ -0,0 +1,88 @@ +import re +from ..nlp.is_passive_was_were_sentence import is_passive_was_were_sentece + + +class WasWereChecker: + def __init__(self, file_info, threshold): + self.file_type = file_info["file_type"]["type"] + self.threshold = threshold + + def get_content_by_file(self, file): + if self.file_type == "report": + return file.pdf_file.get_text_on_page().items() + elif self.file_type == "pres": + return enumerate(file.get_text_from_slides()) + + def generate_output_text(self, detected_senteces, format_page_link_fn=None): + output = "Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:

" + offset_index = 0 + if self.file_type == "pres": + offset_index = 1 + + for index, messages in detected_senteces.items(): + display_index = index + offset_index + if format_page_link_fn: + output += ( + f"Страница {format_page_link_fn([display_index])}:
" + + "
".join(messages) + + "

" + ) + else: + output += ( + f"Страница №{display_index}:
" + + "
".join(messages) + + "

" + ) + return output + + def get_was_were_sentences(self, file): + detected = {} + total_sentences = 0 + for page_index, page_text in self.get_content_by_file(file): + lines = re.split(r"\n", page_text) + non_empty_line_counter = 0 + for line_index, line in enumerate(lines): + print(line_index, line) + line = line.strip() + if not line: + continue + + non_empty_line_counter += 1 + sentences = re.split(r"[.!?…]+\s*", line) + + for sentence in sentences: + sentence = sentence.strip() + if not sentence: + continue + + if is_passive_was_were_sentece(sentence): + total_sentences += 1 + if page_index not in detected: + detected[page_index] = [] + truncated_sentence = ( + sentence[:50] + "..." if len(sentence) > 50 else sentence + ) + if self.file_type == "pres": + err_str = ( + f"Строка {non_empty_line_counter}: {truncated_sentence}" + ) + elif self.file_type == "report": + err_str = f"Строка {line_index + 1}: {truncated_sentence}" + detected[page_index].append(err_str) + + return detected, total_sentences + + def get_result_msg_and_score(self, file, format_page_link): + detected, total_sentences = self.get_was_were_sentences(file) + result_msg = "" + result_score = 1 + if total_sentences == 0: + result_msg = "Пройдена!" + else: + result_msg = self.generate_output_text(detected, format_page_link) + if total_sentences > self.threshold: + result_msg = "Не пройдена!
" + result_msg + result_score = 0 + else: + result_msg = "Пройдена!
" + result_msg + return result_msg, result_score From 46a797d03d9eee4d9f3b000981e7dfcf8955cb41 Mon Sep 17 00:00:00 2001 From: baydakov-georgiy Date: Mon, 23 Mar 2026 20:12:03 +0300 Subject: [PATCH 8/8] restore other files --- app/main/check_packs/pack_config.py | 23 ++++++++++++++- app/settings.py | 45 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 app/settings.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index ae25c4e5..4794e311 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -2,7 +2,27 @@ BASE_PRES_CRITERION = [ ['template_name'], - ['pres_aspect_ratio_check', {'correct_ratios': ['4:3', '16:9']}], + ['slides_number', {'slides_number': [10, 12], 'detect_additional': True}], + ['slides_enum'], + ['slides_headers'], + ['find_slides', {'key_slide': 'Цель и задачи'}], + ['find_slides', {'key_slide': 'Апробация'}], + ['find_on_slide', {'key_slide': ['Актуальность', 'Актуальности', 'Актуальностью']}], + ['find_slides', {'key_slide': 'Заключение'}], + ['slide_every_task', {'min_percent': 70}], + ['conclusion_actual', {'min_percent': 70}], + ['pres_right_words'], + ['pres_image_share'], + ['future_dev'], + ['pres_banned_words_check'], + ['pres_empty_slide'], + ['theme_in_pres_check'], + ['verify_git_link'], + ["slide_headers_duplication_check"], + ['pres_image_capture'], + ['task_tracker'], + ['overview_in_tasks'], + ['pres_was_were_check'], ] BASE_REPORT_CRITERION = [ ["simple_check"], @@ -32,6 +52,7 @@ ["empty_task_page_check"], ["water_in_the_text_check"], ["report_task_tracker"], + ["report_was_were_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/settings.py b/app/settings.py new file mode 100644 index 00000000..1a54230b --- /dev/null +++ b/app/settings.py @@ -0,0 +1,45 @@ +import configparser +import json +import os + +from lti_session_passback.lti.utils import parse_consumer_info + +# read ini file +current_ph = os.path.dirname(os.path.abspath(__file__)) +ini_file = os.path.join(current_ph, 'config.ini') +config = configparser.ConfigParser() +config.read(ini_file) + +# read version file +project_ph = os.path.dirname(current_ph) +version_file = os.path.join(project_ph, "app", "VERSION.json") +try: + with open(version_file) as vfp: + json_string = vfp.read() + VERSION_DATA = json.loads(json_string) +except json.decoder.JSONDecodeError as error: + VERSION_DATA = { + "error": str(error), + "data": error.doc + } +except IOError as error: + VERSION_DATA = {"error": f"{error.strerror}: {error.filename}"} +except Exception as error: + VERSION_DATA = {"error": repr(error)} + +# setup variables +ADMIN_PASSWORD = os.environ.get('ADMIN_PASSWORD', '') + + +SECRET_KEY = os.environ.get('SECRET_KEY', '') +SIGNUP_PAGE_ENABLED = os.environ.get('SIGNUP_PAGE_ENABLED', 'True') == 'True' + +MAX_CONTENT_LENGTH = config.getint('consts', 'MAX_CONTENT_LENGTH') * 1024 * 1024 +MAX_SYSTEM_STORAGE = config.getint('consts', 'MAX_SYSTEM_STORAGE') * 1024 * 1024 + +DEBUG_AUTH = False +consumer_keys = os.environ.get('CONSUMER_KEY', '') +consumer_secrets = os.environ.get('CONSUMER_SECRET', '') +if consumer_keys == '' or consumer_secrets == '': + raise Exception('Required CONSUMER_KEY or CONSUMER_SECRET missing') +LTI_CONSUMERS = parse_consumer_info(consumer_keys, consumer_secrets)