Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
use OCA\Assistant\Reference\Text2StickerProvider;
use OCA\Assistant\TaskProcessing\AudioToAudioChatProvider;
use OCA\Assistant\TaskProcessing\ContextAgentAudioInteractionProvider;
use OCA\Assistant\TaskProcessing\ImageToTextTranslateProvider;
use OCA\Assistant\TaskProcessing\ImageToTextTranslateTaskType;
use OCA\Assistant\TaskProcessing\TextToStickerProvider;
use OCA\Assistant\TaskProcessing\TextToStickerTaskType;
use OCA\Files\Event\LoadAdditionalScriptsEvent;
Expand Down Expand Up @@ -111,6 +113,9 @@ public function register(IRegistrationContext $context): void {
$context->registerTaskProcessingTaskType(TextToStickerTaskType::class);
$context->registerTaskProcessingProvider(TextToStickerProvider::class);
$context->registerReferenceProvider(Text2StickerProvider::class);

$context->registerTaskProcessingTaskType(ImageToTextTranslateTaskType::class);
$context->registerTaskProcessingProvider(ImageToTextTranslateProvider::class);
}

public function boot(IBootContext $context): void {
Expand Down
5 changes: 5 additions & 0 deletions lib/Service/TaskProcessingService.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
use OCP\TaskProcessing\Exception\UnauthorizedException;
use OCP\TaskProcessing\Exception\ValidationException;
use OCP\TaskProcessing\IManager;
use OCP\TaskProcessing\IProvider;
use OCP\TaskProcessing\Task;
use OCP\TaskProcessing\TaskTypes\AudioToText;
use OCP\TaskProcessing\TaskTypes\TextToTextSummary;
Expand All @@ -35,6 +36,10 @@ public function __construct(
) {
}

public function getPreferredProvider(string $taskTypeId): IProvider {
return $this->taskProcessingManager->getPreferredProvider($taskTypeId);
}

/**
* @param Task $task
* @return array
Expand Down
151 changes: 151 additions & 0 deletions lib/TaskProcessing/ImageToTextTranslateProvider.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Assistant\TaskProcessing;

use Exception;
use OCA\Assistant\AppInfo\Application;
use OCA\Assistant\Service\TaskProcessingService;
use OCP\Files\File;
use OCP\IL10N;
use OCP\TaskProcessing\ISynchronousProvider;
use OCP\TaskProcessing\Task;
use OCP\TaskProcessing\TaskTypes\ImageToTextOpticalCharacterRecognition;
use OCP\TaskProcessing\TaskTypes\TextToTextTranslate;
use Psr\Log\LoggerInterface;
use RuntimeException;

class ImageToTextTranslateProvider implements ISynchronousProvider {

public function __construct(
private IL10N $l,
private TaskProcessingService $taskProcessingService,
private LoggerInterface $logger,
) {
}

public function getId(): string {
return Application::APP_ID . '-image2text:translate';
}

public function getName(): string {
return $this->l->t('Assistant');
}

public function getTaskTypeId(): string {
return ImageToTextTranslateTaskType::ID;
}

public function getExpectedRuntime(): int {
return 60;
}

public function getInputShapeEnumValues(): array {
$translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID);

return [
'origin_language' => $translateProvider->getInputShapeEnumValues()['origin_language'],
'target_language' => $translateProvider->getInputShapeEnumValues()['target_language'],
];
}

public function getInputShapeDefaults(): array {
$translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID);
return [
'origin_language' => $translateProvider->getInputShapeDefaults()['origin_language'],
];
}


public function getOptionalInputShape(): array {
return [];
}

public function getOptionalInputShapeEnumValues(): array {
return [];
}

public function getOptionalInputShapeDefaults(): array {
return [];
}

public function getOutputShapeEnumValues(): array {
return [];
}

public function getOptionalOutputShape(): array {
return [];
}

public function getOptionalOutputShapeEnumValues(): array {
return [];
}

public function process(?string $userId, array $input, callable $reportProgress): array {
if (!isset($input['input']) || !is_array($input['input'])) {
throw new RuntimeException('Invalid input');
}
foreach ($input['input'] as $i => $inputImage) {
if (!($inputImage instanceof File) || !$inputImage->isReadable()) {
throw new RuntimeException('Invalid input images');
}
}

if (!isset($input['origin_language']) || !is_string($input['origin_language'])) {
throw new RuntimeException('Invalid origin_language input');
}
if (!isset($input['target_language']) || !is_string($input['target_language'])) {
throw new RuntimeException('Invalid target_language input');
}

// OCR
$ocrInputs = array_map(static function (File $file) {
return $file->getId();
}, $input['input']);
try {
$task = new Task(
ImageToTextOpticalCharacterRecognition::ID,
['input' => $ocrInputs],
Application::APP_ID . ':internal',
$userId,
);
$taskOutput = $this->taskProcessingService->runTaskProcessingTask($task);
$ocrOutputs = $taskOutput['output'];
} catch (Exception $e) {
$this->logger->warning('OCR sub task failed with: ' . $e->getMessage(), ['exception' => $e]);
throw new RuntimeException('OCR sub task failed with: ' . $e->getMessage());
}

$translatedOutputs = [];
foreach ($ocrOutputs as $ocrOutput) {
try {
$task = new Task(
TextToTextTranslate::ID,
[
'input' => $ocrOutput,
'origin_language' => $input['origin_language'],
'target_language' => $input['target_language'],
],
Application::APP_ID . ':internal',
$userId,
);
$taskOutput = $this->taskProcessingService->runTaskProcessingTask($task);
$translatedOutputs[] = $taskOutput['output'];
} catch (Exception $e) {
$this->logger->warning('Translation sub task failed with: ' . $e->getMessage(), ['exception' => $e]);
throw new RuntimeException('Translation sub task failed with: ' . $e->getMessage());
}
}

// Translation
return [
'output' => $translatedOutputs,
];
}
}
82 changes: 82 additions & 0 deletions lib/TaskProcessing/ImageToTextTranslateTaskType.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Assistant\TaskProcessing;

use OCA\Assistant\AppInfo\Application;
use OCP\IL10N;
use OCP\TaskProcessing\EShapeType;
use OCP\TaskProcessing\ITaskType;
use OCP\TaskProcessing\ShapeDescriptor;

class ImageToTextTranslateTaskType implements ITaskType {
public const ID = Application::APP_ID . ':image2text:translate';

public function __construct(
private IL10N $l,
) {
}

/**
* @inheritDoc
*/
public function getName(): string {
return $this->l->t('Translate image');
}

/**
* @inheritDoc
*/
public function getDescription(): string {
return $this->l->t('Translate the text content of an image');
}

/**
* @return string
*/
public function getId(): string {
return self::ID;
}

/**
* @return ShapeDescriptor[]
*/
public function getInputShape(): array {
return [
'input' => new ShapeDescriptor(
$this->l->t('Input files'),
$this->l->t('The files to extract text from'),
EShapeType::ListOfFiles
),
'origin_language' => new ShapeDescriptor(
$this->l->t('Origin language'),
$this->l->t('The language of the origin text'),
EShapeType::Enum
),
'target_language' => new ShapeDescriptor(
$this->l->t('Target language'),
$this->l->t('The desired language to translate the origin text in'),
EShapeType::Enum
),
];
}

/**
* @return ShapeDescriptor[]
*/
public function getOutputShape(): array {
return [
'output' => new ShapeDescriptor(
$this->l->t('Output texts'),
$this->l->t('The texts that were extracted from the files'),
EShapeType::ListOfTexts
),
];
}
}