Skip to content

Commit e15733c

Browse files
committed
[WIP][FEATURE] Add readability calculation to DeepL Overlay
1 parent f9874d0 commit e15733c

16 files changed

Lines changed: 472 additions & 5 deletions
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Controller;
6+
7+
use Psr\Http\Message\ResponseInterface;
8+
use Psr\Http\Message\ServerRequestInterface;
9+
use TYPO3\CMS\Core\Http\JsonResponse;
10+
use WebVision\DeeplWrite\Readability\ReadabilityCalculatorFactory;
11+
12+
final class ReadabilityController
13+
{
14+
public function __construct(private readonly ReadabilityCalculatorFactory $factory)
15+
{
16+
}
17+
18+
public function calculate(ServerRequestInterface $request): ResponseInterface
19+
{
20+
$data = $request->getParsedBody();
21+
$readabilityCalculator = $this->factory->fromLanguage($data['language']);
22+
$readabilityResult = $readabilityCalculator->calculateReadability(strip_tags($data['text'] ?? ''));
23+
return new JsonResponse($readabilityResult->jsonSerialize());
24+
}
25+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Org\Heigl\Hyphenator\Hyphenator;
8+
use WebVision\DeeplWrite\Readability\ReadabilityCalculatorInterface;
9+
10+
abstract class AbstractReadabilityCalculator implements ReadabilityCalculatorInterface
11+
{
12+
protected const LANGUAGE = 'not-supported';
13+
protected const SENTENCE_SPLIT = '/([!\.\?] )/';
14+
protected const HYPHENATED_SPLIT = '/([(\s)+!\.\?|])/';
15+
16+
protected final function countSentences(string $text): int
17+
{
18+
$sentences = preg_split(self::SENTENCE_SPLIT, $text);
19+
if ($sentences === false) {
20+
return 0;
21+
}
22+
return count($sentences);
23+
}
24+
25+
protected function countWords(string $text): int
26+
{
27+
return str_word_count($text);
28+
}
29+
30+
protected final function countSyllables(string $text): int
31+
{
32+
$hyphenator = new Hyphenator();
33+
$hyphenator->getOptions()->setHyphen('|');
34+
$result = $hyphenator->hyphenate($text);
35+
$splitted = preg_split(self::HYPHENATED_SPLIT, $result);
36+
return count($splitted);
37+
}
38+
39+
protected function countCharacters(string $text): int
40+
{
41+
return mb_strlen($text);
42+
}
43+
44+
public function getLanguage(): string
45+
{
46+
return static::LANGUAGE;
47+
}
48+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Symfony\Component\DependencyInjection\Attribute\AsTaggedItem;
8+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
9+
10+
/**
11+
* This class is an implementation generating the Flesch Reading Ease score for German.
12+
* It calculates as follows:
13+
*
14+
* FRE = 206.835 - (1.015 * Average sentence Length (ASL)) - (84.6 * Average word length (AWL))
15+
*
16+
* ASL = (number of words) / (number of sentences)
17+
* ASW = (number of syllables) / (number of words)
18+
*
19+
* The corresponding score is between 0 and 100, where
20+
* * 0 means really difficult to read
21+
* * 100 means really easy to read
22+
*
23+
* For a better overview of the different scoring levels,
24+
* @see https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
25+
*/
26+
#[AsTaggedItem('deepl.readability')]
27+
final class FleschKincaidEnglish extends AbstractReadabilityCalculator
28+
{
29+
protected const LANGUAGE = 'en-us';
30+
public function calculateReadability(string $text): ReadabilityResult
31+
{
32+
$sentences = $this->countSentences($text);
33+
$words = $this->countWords($text);
34+
$syllables = $this->countSyllables($text);
35+
$characters = $this->countCharacters($text);
36+
return new ReadabilityResult(
37+
$text,
38+
$sentences,
39+
$words,
40+
$syllables,
41+
$characters,
42+
$this->calculateScore($words, $sentences, $syllables)
43+
);
44+
}
45+
46+
private function calculateScore(
47+
int $words,
48+
int $sentences,
49+
int $syllables
50+
): float {
51+
if ($sentences <= 0) {
52+
$sentences = 1;
53+
}
54+
if ($words <= 0) {
55+
throw new \InvalidArgumentException(
56+
'The number of words can not be negative or zero!',
57+
1757680362
58+
);
59+
}
60+
61+
// Too easy sentences and short texts COULD result in calculating a value above 100. In this case
62+
// set the result to 100, as this is the maximum.
63+
// This is a known issue in this formula, but can be ignored for a quick overview, as
64+
// 100 means very easy to read.
65+
$fleschKincaid = 206.835 - 1.015 * ($words/$sentences) - (84.6 * $syllables/$words);
66+
return ($fleschKincaid <= 100.0) ? $fleschKincaid : 100.0;
67+
}
68+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Calculator;
6+
7+
use Symfony\Component\DependencyInjection\Attribute\AsTaggedItem;
8+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
9+
10+
/**
11+
* This class is an implementation generating the Flesch Reading Ease score for German.
12+
* It calculates as follows:
13+
*
14+
* FRE = 180 - (Average sentence Length (ASL)) - (58.5 * Average word length (AWL))
15+
*
16+
* ASL = (number of words) / (number of sentences)
17+
* ASW = (number of syllables) / (number of words)
18+
*
19+
* The corresponding score is between 0 and 100, where
20+
* * 0 means really difficult to read
21+
* * 100 means really easy to read
22+
*
23+
* For a better overview of the different scoring levels,
24+
* @see https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
25+
*
26+
* For German calculation,
27+
* @see https://de.wikipedia.org/wiki/Lesbarkeitsindex#F%C3%BCr_Deutsch
28+
*/
29+
#[AsTaggedItem('deepl.readability')]
30+
final class FleschKincaidGerman extends AbstractReadabilityCalculator
31+
{
32+
protected const LANGUAGE = 'de';
33+
public function calculateReadability(string $text): ReadabilityResult
34+
{
35+
$sentences = $this->countSentences($text);
36+
$words = $this->countWords($text);
37+
$syllables = $this->countSyllables($text);
38+
$characters = $this->countCharacters($text);
39+
return new ReadabilityResult(
40+
$text,
41+
$sentences,
42+
$words,
43+
$syllables,
44+
$characters,
45+
$this->calculateScore($words, $sentences, $syllables)
46+
);
47+
}
48+
49+
private function calculateScore(
50+
int $words,
51+
int $sentences,
52+
int $syllables
53+
): float {
54+
if ($sentences <= 0) {
55+
$sentences = 1;
56+
}
57+
if ($words <= 0) {
58+
throw new \InvalidArgumentException(
59+
'The number of words can not be negative or zero!',
60+
1757679534
61+
);
62+
}
63+
return 180 - ($words/$sentences) - (58.5 * $syllables/$words);
64+
}
65+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
final class ReadabilityCalculatorFactory
8+
{
9+
public function __construct(private readonly ReadabilityCalculatorRegistryInterface $registry)
10+
{
11+
}
12+
13+
public function fromLanguage(string $language): ReadabilityCalculatorInterface
14+
{
15+
return $this->registry->findByLanguage($language);
16+
}
17+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
use WebVision\DeeplWrite\Readability\Result\ReadabilityResult;
8+
9+
interface ReadabilityCalculatorInterface
10+
{
11+
public function getLanguage(): string;
12+
public function calculateReadability(string $text): ReadabilityResult;
13+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
use Psr\Container\ContainerInterface;
8+
9+
final class ReadabilityCalculatorRegistry implements ReadabilityCalculatorRegistryInterface
10+
{
11+
/**
12+
* @var array<ReadabilityCalculatorInterface>
13+
*/
14+
private array $services;
15+
public function __construct(iterable $calculators)
16+
{
17+
foreach ($calculators as $calculator) {
18+
$this->services[] = $calculator;
19+
}
20+
}
21+
22+
public function findByLanguage(string $language): ReadabilityCalculatorInterface
23+
{
24+
foreach ($this->services as $service) {
25+
if ($service->getLanguage() === $language) {
26+
return $service;
27+
}
28+
}
29+
throw new \InvalidArgumentException(
30+
sprintf('No service found for langauge "%s"', $language),
31+
1757686580
32+
);
33+
}
34+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability;
6+
7+
interface ReadabilityCalculatorRegistryInterface
8+
{
9+
public function findByLanguage(string $language): ReadabilityCalculatorInterface;
10+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Readability\Result;
6+
7+
/**
8+
* Represents the result of a readability analysis performed on a given text.
9+
* It provides metrics such as sentence, word, syllable, and character counts,
10+
* as well as a calculated readability score and averages per sentence or word.
11+
*/
12+
final class ReadabilityResult implements \JsonSerializable
13+
{
14+
public function __construct(
15+
public readonly string $text,
16+
public readonly int $sentences,
17+
public readonly int $words,
18+
public readonly int $syllables,
19+
public readonly int $characters,
20+
public readonly float $score
21+
) {}
22+
23+
public function getAverageWordsPerSentence(): float
24+
{
25+
return round($this->words/$this->sentences, 2);
26+
}
27+
28+
public function getAverageSyllablesPerWord(): float
29+
{
30+
return round($this->syllables/$this->words, 2);
31+
}
32+
33+
public function jsonSerialize(): array
34+
{
35+
return [
36+
'sentences' => $this->sentences,
37+
'words' => $this->words,
38+
'syllables' => $this->syllables,
39+
'characters' => $this->characters,
40+
'avgSyllables' => $this->getAverageSyllablesPerWord(),
41+
'avgWords' => $this->getAverageWordsPerSentence(),
42+
'score' => $this->score,
43+
];
44+
}
45+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WebVision\DeeplWrite\Service;
6+
7+
final class ReadingEaseService
8+
{
9+
10+
}

0 commit comments

Comments
 (0)