Skip to content
2 changes: 2 additions & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
['pres_image_capture'],
['task_tracker'],
['overview_in_tasks'],
['pres_was_were_check'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand Down Expand Up @@ -51,6 +52,7 @@
["empty_task_page_check"],
["water_in_the_text_check"],
["report_task_tracker"],
["report_was_were_check"],
]

DEFAULT_TYPE = 'pres'
Expand Down
2 changes: 1 addition & 1 deletion app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
module_globals=globals(),
base_class=BaseCriterion,
current_file=__file__
)
)
18 changes: 18 additions & 0 deletions app/main/checks/presentation_checks/was_were_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ..base_check import BasePresCriterion, answer
from app.utils.was_were_check import WasWereChecker


class PresWasWereCheck(BasePresCriterion):
label = "Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла"
id = "pres_was_were_check"

def __init__(self, file_info, threshold=3):
super().__init__(file_info)
self.threshold = threshold
self.checker = WasWereChecker(file_info, threshold)

def check(self):
message, score = self.checker.get_result_msg_and_score(
self.file, self.format_page_link
)
return answer(score, message)
18 changes: 18 additions & 0 deletions app/main/checks/report_checks/was_were_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ..base_check import BaseReportCriterion, answer
from app.utils.was_were_check import WasWereChecker


class ReportWasWereCheck(BaseReportCriterion):
label = "Проверка на пассивные конструкции, начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла"
_description = "Предложения начинающиеся с Был/Была/Было/Были, которые можно убрать без потери смысла"
id = "report_was_were_check"

def __init__(self, file_info, threshold=3):
super().__init__(file_info)
self.checker = WasWereChecker(file_info, threshold)

def check(self):
message, score = self.checker.get_result_msg_and_score(
self.file, self.format_page_link
)
return answer(score, message)
44 changes: 44 additions & 0 deletions app/nlp/is_passive_was_were_sentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re
import pymorphy3
import string

morph = pymorphy3.MorphAnalyzer()


def is_passive_was_were_sentece(sentence):
"""
Примеры плохих предложений (пассивные конструкции с "Был*" - можно убрать):
- Был проведен анализ данных
- Была выполнена работа по исследованию
- Было принято решение о внедрении
- Были получены следующие результаты
- Была создана база данных

Примеры хороших предложений ("Был*" нельзя убрать):
- Было бы здорово получить новые данные
- Был сильный скачок напряжения
- Были времена, когда это казалось невозможным
- Был студентом университета три года назад
- Была программистом до выхода на пенсию
"""
first_words = re.split(r"\s+", sentence.strip(), maxsplit=2)
if len(first_words) < 2:
return False

first_word = clean_word(first_words[0])
second_word = clean_word(first_words[1])

parsed = morph.parse(first_word)[0]
if (
parsed.normal_form == "быть"
and "past" in parsed.tag
and parsed.tag.POS == "VERB"
):
second_word_parsed = morph.parse(second_word)[0]
return "PRTS" in second_word_parsed.tag and "pssv" in second_word_parsed.tag
return False


def clean_word(word):
punct = string.punctuation.replace("-", "")
return word.translate(str.maketrans("", "", punct))
88 changes: 88 additions & 0 deletions app/utils/was_were_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import re
from ..nlp.is_passive_was_were_sentence import is_passive_was_were_sentece


class WasWereChecker:
def __init__(self, file_info, threshold):
self.file_type = file_info["file_type"]["type"]
self.threshold = threshold

def get_content_by_file(self, file):
if self.file_type == "report":
return file.pdf_file.get_text_on_page().items()
elif self.file_type == "pres":
return enumerate(file.get_text_from_slides())

def generate_output_text(self, detected_senteces, format_page_link_fn=None):
output = "Обнаружены конструкции (Был/Была/Было/Были), которые можно удалить без потери смысла:<br><br>"
offset_index = 0
if self.file_type == "pres":
offset_index = 1

for index, messages in detected_senteces.items():
display_index = index + offset_index
if format_page_link_fn:
output += (
f"<b>Страница {format_page_link_fn([display_index])}:</b> <br>"
+ "<br>".join(messages)
+ "<br><br>"
)
else:
output += (
f"<b>Страница №{display_index}:</b> <br>"
+ "<br>".join(messages)
+ "<br><br>"
)
return output

def get_was_were_sentences(self, file):
detected = {}
total_sentences = 0
for page_index, page_text in self.get_content_by_file(file):
lines = re.split(r"\n", page_text)
non_empty_line_counter = 0
for line_index, line in enumerate(lines):
print(line_index, line)
line = line.strip()
if not line:
continue

non_empty_line_counter += 1
sentences = re.split(r"[.!?…]+\s*", line)

for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue

if is_passive_was_were_sentece(sentence):
total_sentences += 1
if page_index not in detected:
detected[page_index] = []
truncated_sentence = (
sentence[:50] + "..." if len(sentence) > 50 else sentence
)
if self.file_type == "pres":
err_str = (
f"Строка {non_empty_line_counter}: {truncated_sentence}"
)
elif self.file_type == "report":
err_str = f"Строка {line_index + 1}: {truncated_sentence}"
detected[page_index].append(err_str)

return detected, total_sentences

def get_result_msg_and_score(self, file, format_page_link):
detected, total_sentences = self.get_was_were_sentences(file)
result_msg = ""
result_score = 1
if total_sentences == 0:
result_msg = "Пройдена!"
else:
result_msg = self.generate_output_text(detected, format_page_link)
if total_sentences > self.threshold:
result_msg = "Не пройдена!<br/>" + result_msg
result_score = 0
else:
result_msg = "Пройдена!<br/>" + result_msg
return result_msg, result_score
Loading