diff --git a/packages/service/core/evaluation/target/index.ts b/packages/service/core/evaluation/target/index.ts index 0fdf4d1b1c3e..96e7bb9c3122 100644 --- a/packages/service/core/evaluation/target/index.ts +++ b/packages/service/core/evaluation/target/index.ts @@ -82,8 +82,6 @@ export class WorkflowTarget extends EvaluationTarget { } async execute(input: TargetInput): Promise { - const startTime = Date.now(); - // Get application information const appData = await MongoApp.findById(this.config.appId); if (!appData) { @@ -211,7 +209,7 @@ export class WorkflowTarget extends EvaluationTarget { actualOutput: response, retrievalContext: extractRetrievalContext(flowResponses), usage: flowUsages, - responseTime: Date.now() - startTime, + responseTime: durationSeconds, chatId, aiChatItemDataId }; diff --git a/packages/service/core/evaluation/task/index.ts b/packages/service/core/evaluation/task/index.ts index de2b9ff2e779..d12b4f730bde 100644 --- a/packages/service/core/evaluation/task/index.ts +++ b/packages/service/core/evaluation/task/index.ts @@ -797,7 +797,11 @@ export class EvaluationTaskService { } }); commonPipeline.push({ - $match: { hasFailedEvaluator: true } + $match: { + hasFailedEvaluator: true, + 'metadata.status': EvaluationStatusEnum.completed, // Only completed items + evaluatorOutputs: { $exists: true, $ne: null, $not: { $size: 0 } } // Valid evaluator outputs + } }); } } @@ -913,6 +917,17 @@ export class EvaluationTaskService { // Get the updated item to determine the evalId const updatedItem = await MongoEvalItem.findById(itemId, 'evalId'); if (updatedItem) { + const cleanupResult = await removeEvaluationItemJobsByItemId(itemId, { + forceCleanActiveJobs: true, + retryAttempts: 3, + retryDelay: 200 + }); + + addLog.debug('Queue cleanup completed for evaluation item deletion', { + itemId, + cleanup: cleanupResult + }); + // Reset results and re-queue const evaluatorOutputs = evaluation.evaluators.map((evaluator) => ({ metricName: evaluator.metric.name @@ -976,7 +991,7 @@ export class EvaluationTaskService { // Retry the job directly (active event will clear error state automatically) await job.retry(); - addLog.info('Evaluation item retried successfully', { + addLog.debug('Evaluation item retried successfully', { itemId, evalId: item.evalId, teamId @@ -1016,7 +1031,7 @@ export class EvaluationTaskService { } } - addLog.info('All failed evaluation items retry completed', { + addLog.debug('All failed evaluation items retry completed', { evalId, teamId, totalFailedJobs: evaluationFailedJobs.length, diff --git a/packages/service/core/evaluation/task/statusCalculator.ts b/packages/service/core/evaluation/task/statusCalculator.ts index 6994a2f75851..f2a899411346 100644 --- a/packages/service/core/evaluation/task/statusCalculator.ts +++ b/packages/service/core/evaluation/task/statusCalculator.ts @@ -45,7 +45,6 @@ export async function getEvaluationTaskStatus(evalId: string): Promise job.data.evalItemId === evalItemId); } catch (error) { - addLog.error('Error checking evaluation item job active status:', { evalItemId, error }); return false; } } diff --git a/packages/web/i18n/en/admin.json b/packages/web/i18n/en/admin.json index 77c70e2b847b..1544f44257b1 100644 --- a/packages/web/i18n/en/admin.json +++ b/packages/web/i18n/en/admin.json @@ -298,6 +298,19 @@ "custom_pdf_parse_timeout": "Custom PDF parsing timeout", "doc2x_pdf_parse_key": "Doc2x PDF parsing key (lower priority than custom PDF parsing)", "custom_pdf_parse_price": "Custom PDF parsing price (n points/page)", + "eval_config": "Evaluation Configuration", + "task_concurrency": "Task Concurrency", + "task_concurrency_desc": "Number of evaluation tasks running simultaneously", + "item_concurrency": "Item Concurrency", + "item_concurrency_desc": "Number of items processed concurrently within a single task", + "item_max_retry": "Max Retry Count", + "item_max_retry_desc": "Maximum retry attempts when evaluation items fail", + "default_threshold": "Default Threshold", + "default_threshold_desc": "Default threshold for evaluation (between 0-1)", + "summary_concurrency": "Summary Concurrency", + "data_quality_concurrency": "Data Quality Concurrency", + "dataset_synthesize_concurrency": "Dataset Synthesis Concurrency", + "smart_generate_concurrency": "Smart Generation Concurrency", "max_upload_files_per_time": "Max files per upload", "max_upload_files_per_time_desc": "Maximum number of files per upload to a knowledge base", "max_upload_file_size": "Max file size (MB)", diff --git a/packages/web/i18n/zh-CN/admin.json b/packages/web/i18n/zh-CN/admin.json index e8ccce4f6e7f..fdf3042aaaa4 100644 --- a/packages/web/i18n/zh-CN/admin.json +++ b/packages/web/i18n/zh-CN/admin.json @@ -298,6 +298,19 @@ "custom_pdf_parse_timeout": "自定义 PDF 解析超时时间", "doc2x_pdf_parse_key": "Doc2x pdf 解析密钥(比自定义 PDF 解析优先级低)", "custom_pdf_parse_price": "自定义 PDF 解析价格(n 积分/页)", + "eval_config": "评估配置", + "task_concurrency": "任务并发数", + "task_concurrency_desc": "同时执行的评估任务数量", + "item_concurrency": "项目并发数", + "item_concurrency_desc": "单个任务中并发处理的项目数量", + "item_max_retry": "最大重试次数", + "item_max_retry_desc": "评估项目失败时的最大重试次数", + "default_threshold": "默认阈值", + "default_threshold_desc": "评估的默认阈值 (0-1之间)", + "summary_concurrency": "总结并发数", + "data_quality_concurrency": "数据质量并发数", + "dataset_synthesize_concurrency": "数据合成并发数", + "smart_generate_concurrency": "智能生成并发数", "max_upload_files_per_time": "单次最多上传多少个文件", "max_upload_files_per_time_desc": "用户上传知识库时,每次上传最多选择多少个文件", "max_upload_file_size": "上传文件最大大小(M)", diff --git a/packages/web/i18n/zh-Hant/admin.json b/packages/web/i18n/zh-Hant/admin.json index e0733a05c560..f01f2e28920f 100644 --- a/packages/web/i18n/zh-Hant/admin.json +++ b/packages/web/i18n/zh-Hant/admin.json @@ -298,6 +298,19 @@ "custom_pdf_parse_timeout": "自定義 PDF 解析逾時時間", "doc2x_pdf_parse_key": "Doc2x pdf 解析密鑰(比自定義 PDF 解析優先級低)", "custom_pdf_parse_price": "自定義 PDF 解析價格(n 積分/頁)", + "eval_config": "評估配置", + "task_concurrency": "任務並發數", + "task_concurrency_desc": "同時執行的評估任務數量", + "item_concurrency": "項目並發數", + "item_concurrency_desc": "單個任務中並發處理的項目數量", + "item_max_retry": "最大重試次數", + "item_max_retry_desc": "評估項目失敗時的最大重試次數", + "default_threshold": "默認閾值", + "default_threshold_desc": "評估的默認闾值 (0-1之間)", + "summary_concurrency": "總結並發數", + "data_quality_concurrency": "数據質量並發數", + "dataset_synthesize_concurrency": "數據合成並發數", + "smart_generate_concurrency": "智能生成並發數", "max_upload_files_per_time": "單次最多上傳多少個文件", "max_upload_files_per_time_desc": "用戶上傳知識庫時,每次上傳最多選擇多少個文件", "max_upload_file_size": "上傳文件最大大小(M)", diff --git a/projects/app/src/pages/dashboard/evaluation/task/detail/index.tsx b/projects/app/src/pages/dashboard/evaluation/task/detail/index.tsx index 301224152721..9127f623e2d2 100644 --- a/projects/app/src/pages/dashboard/evaluation/task/detail/index.tsx +++ b/projects/app/src/pages/dashboard/evaluation/task/detail/index.tsx @@ -907,34 +907,6 @@ const Detail = ({ taskId, currentTab }: Props) => { onClick={handleRefresh} /> - - } - onClick={handleEdit} - /> - - - } - /> - - } - /> )} @@ -947,29 +919,47 @@ const Detail = ({ taskId, currentTab }: Props) => { > {t('dashboard_evaluation:view_full_response')} - - } - /> - - } - /> )} - {/* 排队中或评测中状态不显示任何按钮 */} + {/* 对于非排队/评测状态,显示编辑和删除按钮 */} + {selectedItem && + selectedItem.status !== EvaluationStatusEnum.queuing && + selectedItem.status !== EvaluationStatusEnum.evaluating && ( + <> + + } + onClick={handleEdit} + /> + + + } + /> + + } + /> + + )} )} diff --git a/test/cases/service/core/evaluation/evaluator.test.ts b/test/cases/service/core/evaluation/evaluator/evaluator.test.ts similarity index 100% rename from test/cases/service/core/evaluation/evaluator.test.ts rename to test/cases/service/core/evaluation/evaluator/evaluator.test.ts diff --git a/test/cases/service/core/evaluation/target.test.ts b/test/cases/service/core/evaluation/target/target.test.ts similarity index 100% rename from test/cases/service/core/evaluation/target.test.ts rename to test/cases/service/core/evaluation/target/target.test.ts diff --git a/test/cases/service/core/evaluation/task.test.ts b/test/cases/service/core/evaluation/task.test.ts deleted file mode 100644 index 6bcca4955c9f..000000000000 --- a/test/cases/service/core/evaluation/task.test.ts +++ /dev/null @@ -1,2924 +0,0 @@ -import { beforeAll, afterAll, beforeEach, describe, test, expect, vi } from 'vitest'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { MongoEvaluation, MongoEvalItem } from '@fastgpt/service/core/evaluation/task/schema'; -import { MongoEvalDatasetCollection } from '@fastgpt/service/core/evaluation/dataset/evalDatasetCollectionSchema'; -import { MongoEvalDatasetData } from '@fastgpt/service/core/evaluation/dataset/evalDatasetDataSchema'; -import { MongoEvalMetric } from '@fastgpt/service/core/evaluation/metric/schema'; -import type { - CreateEvaluationParams, - EvalTarget, - EvaluatorSchema, - EvaluationTaskJobData, - EvaluationItemJobData -} from '@fastgpt/global/core/evaluation/type'; -import type { AuthModeType } from '@fastgpt/service/support/permission/type'; -import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; -import { Types } from '@fastgpt/service/common/mongo'; -import { TeamErrEnum } from '@fastgpt/global/common/error/code/team'; -import { getErrText } from '@fastgpt/global/common/error/utils'; - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task/mq', () => ({ - evaluationTaskQueue: { - add: vi.fn(), - client: Promise.resolve({ - on: vi.fn(), - quit: vi.fn().mockResolvedValue(undefined) - }) - }, - evaluationItemQueue: { - add: vi.fn(), - addBulk: vi.fn() - }, - removeEvaluationTaskJob: vi.fn().mockResolvedValue({ - queue: 'evalTask', - totalJobs: 0, - removedJobs: 0, - failedRemovals: 0, - errors: [] - }), - removeEvaluationItemJobs: vi.fn().mockResolvedValue({ - queue: 'evalTaskItem', - totalJobs: 0, - removedJobs: 0, - failedRemovals: 0, - errors: [] - }), - removeEvaluationItemJobsByItemId: vi.fn().mockResolvedValue({ - queue: 'evalTaskItem', - totalJobs: 0, - removedJobs: 0, - failedRemovals: 0, - errors: [] - }), - getEvaluationTaskWorker: vi.fn(), - getEvaluationItemWorker: vi.fn() -})); - -vi.mock('@fastgpt/service/support/wallet/usage/controller', () => ({ - createTrainingUsage: vi.fn(), - createEvaluationUsage: vi.fn(), - concatUsage: vi.fn(), - evaluationUsageIndexMap: {} -})); - -// Mock evaluation usage utils -vi.mock('@fastgpt/service/core/evaluation/utils/usage', () => ({ - createMergedEvaluationUsage: vi.fn().mockImplementation(async (params) => { - // Call the mocked concatUsage to satisfy test expectations - const { concatUsage } = await import('@fastgpt/service/support/wallet/usage/controller'); - await concatUsage(params); - return undefined; - }) -})); - -// vi.mock('@fastgpt/service/common/system/log', () => ({ -// addLog: { -// info: vi.fn(), -// warn: vi.fn(), -// error: vi.fn(), -// debug: vi.fn() -// } -// })); - -vi.mock('@fastgpt/service/support/permission/controller', () => ({ - parseHeaderCert: vi.fn() -})); - -vi.mock('@fastgpt/service/support/permission/teamLimit', () => ({ - checkTeamAIPoints: vi.fn().mockResolvedValue(undefined) -})); - -vi.mock('@fastgpt/service/core/evaluation/target', () => ({ - createTargetInstance: vi.fn() -})); - -vi.mock('@fastgpt/service/core/evaluation/evaluator', () => ({ - createEvaluatorInstance: vi.fn() -})); - -// Mock summary service -vi.mock('@fastgpt/service/core/evaluation/summary', () => ({ - EvaluationSummaryService: { - calculateAndSaveMetricScores: vi.fn().mockResolvedValue(undefined), - generateSummaryReports: vi.fn().mockResolvedValue(undefined) - } -})); - -// Mock weight calculator -vi.mock('@fastgpt/service/core/evaluation/summary/util/weightCalculator', () => ({ - buildEvalDataConfig: vi.fn((evaluators) => ({ - evaluators: evaluators.map((evaluator) => ({ - metric: evaluator.metric, - runtimeConfig: evaluator.runtimeConfig, - thresholdValue: evaluator.thresholdValue ?? 0.8 - })), - summaryConfigs: evaluators.map((evaluator, index) => ({ - metricId: evaluator.metric._id.toString(), - metricName: evaluator.metric.name, - weight: 100, - calculateType: 'mean', - score: 0, - summary: '', - summaryStatus: 'pending', - errorReason: '', - completedItemCount: 0, - overThresholdItemCount: 0, - thresholdPassRate: 0 - })) - })) -})); - -// Mock distributed lock service -vi.mock('@fastgpt/service/core/evaluation/task/distributedLock', () => ({ - EvaluationDistributedLockService: { - getInstance: vi.fn(() => ({ - withLock: vi.fn(async (_key, fn) => await fn()), - close: vi.fn().mockResolvedValue(undefined) - })) - }, - EvaluationLockPatterns: vi.fn().mockImplementation(() => ({ - withTaskFinishLock: vi.fn(async (_evalId, fn) => await fn()), - withTaskCreateLock: vi.fn(async (_teamId, _taskName, fn) => await fn()), - withSummaryLock: vi.fn(async (_evalId, _metricId, fn) => await fn()), - withBatchOperationLock: vi.fn(async (_evalId, _operation, fn) => await fn()) - })) -})); - -import { evaluationTaskQueue, evaluationItemQueue } from '@fastgpt/service/core/evaluation/task/mq'; -import { - createTrainingUsage, - createEvaluationUsage, - concatUsage -} from '@fastgpt/service/support/wallet/usage/controller'; -import { parseHeaderCert } from '@fastgpt/service/support/permission/controller'; -import { checkTeamAIPoints } from '@fastgpt/service/support/permission/teamLimit'; -import { createTargetInstance } from '@fastgpt/service/core/evaluation/target'; -import { createEvaluatorInstance } from '@fastgpt/service/core/evaluation/evaluator'; -import { - EvalMetricTypeEnum, - MetricResultStatusEnum -} from '@fastgpt/global/core/evaluation/metric/constants'; -import { CalculateMethodEnum, SummaryStatusEnum } from '@fastgpt/global/core/evaluation/constants'; - -describe('EvaluationTaskService', () => { - let teamId: string; - let tmbId: string; - let evalDatasetCollectionId: string; - let target: EvalTarget; - let metricId: string; - let evaluators: EvaluatorSchema[]; - let auth: AuthModeType; - - beforeAll(async () => { - // 数据库连接在 setup.ts 中处理 - teamId = '507f1f77bcf86cd799439011'; - tmbId = '507f1f77bcf86cd799439012'; - auth = { req: {} as any, authToken: true }; - - // 定义测试用的目标对象 - target = { - type: 'workflow', - config: { - appId: '507f1f77bcf86cd799439011', - versionId: '507f1f77bcf86cd799439012', - chatConfig: { - temperature: 0.7, - maxToken: 2000 - } - } - }; - }); - - afterAll(async () => { - // 清理测试数据 - await Promise.all([ - MongoEvaluation.deleteMany({ teamId }), - MongoEvalItem.deleteMany({}), - MongoEvalDatasetCollection.deleteMany({ teamId }), - MongoEvalDatasetData.deleteMany({ teamId }), - // Target现在嵌入在Evaluation中,不需要单独清理 - MongoEvalMetric.deleteMany({ teamId }) - ]); - }); - - beforeEach(async () => { - vi.clearAllMocks(); - // Mock createTrainingUsage - (createTrainingUsage as any).mockResolvedValue({ billId: new Types.ObjectId() }); - // Mock createEvaluationUsage - (createEvaluationUsage as any).mockResolvedValue({ billId: new Types.ObjectId() }); - // Mock concatUsage - (concatUsage as any).mockResolvedValue(undefined); - // Mock parseHeaderCert - 返回正确的ObjectId类型 - (parseHeaderCert as any).mockResolvedValue({ - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId) - }); - - // 创建测试数据 - const dataset = await MongoEvalDatasetCollection.create({ - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Test Dataset', - description: 'Dataset for task testing' - }); - evalDatasetCollectionId = dataset._id.toString(); - - // 创建数据集数据项 - await MongoEvalDatasetData.create([ - { - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - evalDatasetCollectionId: dataset._id, - userInput: 'What is AI?', - expectedOutput: 'Artificial Intelligence' - }, - { - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - evalDatasetCollectionId: dataset._id, - userInput: 'What is ML?', - expectedOutput: 'Machine Learning' - } - ]); - - const metric = await MongoEvalMetric.create({ - teamId: teamId, - tmbId: tmbId, - name: 'Test Metric', - description: 'Metric for task testing', - type: EvalMetricTypeEnum.Custom, - prompt: 'Please evaluate the quality of the response.', - llmRequired: true, - userInputRequired: true, - actualOutputRequired: true, - expectedOutputRequired: true, - createTime: new Date(), - updateTime: new Date() - }); - - metricId = metric._id.toString(); - - // Create evaluators array based on the metric - evaluators = [ - { - metric: metric.toObject(), - runtimeConfig: { - llm: 'gpt-3.5-turbo' - }, - thresholdValue: 0.8 - } - ]; - }); - - describe('createEvaluation', () => { - test('应该成功创建评估任务', async () => { - const params: CreateEvaluationParams = { - name: 'Test Evaluation', - description: 'A test evaluation for unit testing', - evalDatasetCollectionId, - target: { - type: 'workflow', - config: { - appId: '507f1f77bcf86cd799439013', - versionId: '507f1f77bcf86cd799439014', - chatConfig: {} - } - }, - evaluators: evaluators - }; - - // 添加重试机制处理MongoDB事务冲突 - let evaluation: any; - let retryCount = 0; - const maxRetries = 3; - - while (retryCount < maxRetries) { - try { - evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - break; // 成功则退出循环 - } catch (error: any) { - retryCount++; - if ( - error?.message?.includes('Collection namespace') && - error?.message?.includes('is already in use') && - retryCount < maxRetries - ) { - // 等待一段时间后重试 - await new Promise((resolve) => setTimeout(resolve, 100 * retryCount)); - continue; - } - throw error; // 非命名空间冲突错误或重试次数用完,直接抛出 - } - } - - expect(evaluation.name).toBe(params.name); - expect(evaluation.description).toBe(params.description); - expect(evaluation.evalDatasetCollectionId.toString()).toBe(evalDatasetCollectionId); - expect(evaluation.target.type).toBe('workflow'); - expect(evaluation.target.config.appId).toBe('507f1f77bcf86cd799439013'); - expect(evaluation.target.config.versionId).toBe('507f1f77bcf86cd799439014'); - expect(evaluation.evaluators).toHaveLength(1); - expect(evaluation.evaluators[0].metric._id.toString()).toBe(metricId); - expect(evaluation.evaluators[0].runtimeConfig.llm).toBe('gpt-3.5-turbo'); - expect(evaluation.teamId.toString()).toBe(teamId); - expect(evaluation.tmbId.toString()).toBe(tmbId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(Types.ObjectId.isValid(evaluation.usageId)).toBe(true); - - // 验证创建用量记录被调用 - expect(createEvaluationUsage).toHaveBeenCalledWith({ - teamId: teamId, - tmbId: tmbId, - appName: params.name - }); - }); - - test('缺少必填字段时应该抛出错误', async () => { - const invalidParams = { - name: 'Invalid Evaluation' - // 缺少其他必填字段 - }; - - await expect(EvaluationTaskService.createEvaluation(invalidParams as any)).rejects.toThrow(); - }); - - test('应该支持自动启动功能(默认值)', async () => { - const params: CreateEvaluationParams = { - name: 'Auto Start Test Evaluation', - description: 'Test evaluation with auto start', - evalDatasetCollectionId, - target, - evaluators: evaluators - // autoStart 未指定,应使用默认值 true - }; - - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 验证评估任务被创建且自动启动(状态应为 queuing,processor会将其改为 evaluating) - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluationTaskQueue.add).toHaveBeenCalledWith(`eval_task_${evaluation._id}`, { - evalId: evaluation._id.toString() - }); - }); - - test('应该支持显式启用自动启动', async () => { - const params: CreateEvaluationParams = { - name: 'Explicit Auto Start Test', - description: 'Test evaluation with explicit auto start', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: true - }; - - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 验证评估任务被创建且自动启动(状态应为 queuing) - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluationTaskQueue.add).toHaveBeenCalledWith(`eval_task_${evaluation._id}`, { - evalId: evaluation._id.toString() - }); - }); - - test('应该支持关闭自动启动', async () => { - const params: CreateEvaluationParams = { - name: 'No Auto Start Test', - description: 'Test evaluation without auto start', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 验证评估任务被创建但未自动启动(状态应为 queuing) - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluationTaskQueue.add).not.toHaveBeenCalled(); - }); - }); - - describe('getEvaluation', () => { - test('应该成功获取评估任务', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Get Test Evaluation', - description: 'Test evaluation for get operation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - - expect(evaluation._id.toString()).toBe(created._id.toString()); - expect(evaluation.name).toBe('Get Test Evaluation'); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - }); - - test('评估任务不存在时应该抛出错误', async () => { - const nonExistentId = new Types.ObjectId().toString(); - - await expect(EvaluationTaskService.getEvaluation(nonExistentId, teamId)).rejects.toThrow( - 'evaluationTaskNotFound' - ); - }); - }); - - describe('updateEvaluation', () => { - test('应该成功更新评估任务', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Update Test Evaluation', - description: 'Original description', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const updates = { - name: 'Updated Test Evaluation', - description: 'Updated description' - }; - - await EvaluationTaskService.updateEvaluation(created._id, updates, teamId); - - const updatedEvaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(updatedEvaluation.name).toBe(updates.name); - expect(updatedEvaluation.description).toBe(updates.description); - }); - }); - - describe('listEvaluations', () => { - test('应该成功获取评估任务列表', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'List Test Evaluation', - description: 'Test evaluation for list operation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - undefined, - undefined, - tmbId, - true - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(typeof result.total).toBe('number'); - expect(result.list.length).toBeGreaterThanOrEqual(1); - - const evaluation = result.list.find((e) => e._id.toString() === created._id.toString()); - expect(evaluation).toBeDefined(); - expect(evaluation?.name).toBe('List Test Evaluation'); - }); - - test('应该支持搜索功能', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Searchable Test Evaluation', - description: 'Test evaluation for search operation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - 'Searchable', - undefined, - tmbId, - true - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(result.list.some((evaluation) => evaluation.name.includes('Searchable'))).toBe(true); - }); - - test('应该支持按appName过滤', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - - // 创建使用不同appId的评估任务 - const targetWithAppName = { - type: 'workflow' as const, - config: { - appId: '507f1f77bcf86cd799439015', - versionId: '507f1f77bcf86cd799439016', - chatConfig: {} - } - }; - - const params: CreateEvaluationParams = { - name: 'App Name Filter Test', - description: 'Test evaluation for app name filtering', - evalDatasetCollectionId, - target: targetWithAppName, - evaluators: evaluators - }; - - await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 模拟有应用名称的情况 - 实际场景中appName会在aggregation阶段从apps collection中lookup得到 - // 这里我们测试带appName参数的调用 - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - undefined, - undefined, - tmbId, - true, - 'Test App Name' - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(typeof result.total).toBe('number'); - }); - - test('应该支持按appId过滤', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - - const targetWithSpecificAppId = { - type: 'workflow' as const, - config: { - appId: '507f1f77bcf86cd799439020', - versionId: '507f1f77bcf86cd799439021', - chatConfig: {} - } - }; - - const params: CreateEvaluationParams = { - name: 'App ID Filter Test', - description: 'Test evaluation for app ID filtering', - evalDatasetCollectionId, - target: targetWithSpecificAppId, - evaluators: evaluators - }; - - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - undefined, - undefined, - tmbId, - true, - undefined, - '507f1f77bcf86cd799439020' - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(typeof result.total).toBe('number'); - // 由于过滤条件匹配,应该能找到创建的评估任务 - const foundEvaluation = result.list.find((e) => e._id.toString() === created._id.toString()); - expect(foundEvaluation).toBeDefined(); - }); - - test('应该支持按versionId过滤', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - - const targetWithSpecificVersionId = { - type: 'workflow' as const, - config: { - appId: '507f1f77bcf86cd799439025', - versionId: '507f1f77bcf86cd799439026', - chatConfig: {} - } - }; - - const params: CreateEvaluationParams = { - name: 'Version ID Filter Test', - description: 'Test evaluation for version ID filtering', - evalDatasetCollectionId, - target: targetWithSpecificVersionId, - evaluators: evaluators - }; - - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - '507f1f77bcf86cd799439026', - undefined, - tmbId, - true, - undefined, - undefined - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(typeof result.total).toBe('number'); - // 由于过滤条件匹配,应该能找到创建的评估任务 - const foundEvaluation = result.list.find((e) => e._id.toString() === created._id.toString()); - expect(foundEvaluation).toBeDefined(); - }); - - test('应该支持组合target过滤条件', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - - const targetWithMultipleFilters = { - type: 'workflow' as const, - config: { - appId: '507f1f77bcf86cd799439030', - versionId: '507f1f77bcf86cd799439031', - chatConfig: {} - } - }; - - const params: CreateEvaluationParams = { - name: 'Multiple Filters Test', - description: 'Test evaluation for multiple target filtering', - evalDatasetCollectionId, - target: targetWithMultipleFilters, - evaluators: evaluators - }; - - await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - '507f1f77bcf86cd799439031', - undefined, - tmbId, - true, - 'Test App', // appName filter - '507f1f77bcf86cd799439030' // appId filter - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(typeof result.total).toBe('number'); - }); - - test('不匹配的过滤条件应该返回空结果', async () => { - // Clean up any leftover evaluation tasks - await MongoEvaluation.deleteMany({ teamId }); - - const params: CreateEvaluationParams = { - name: 'No Match Test', - description: 'Test evaluation that should not match filters', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - - await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.listEvaluations( - teamId, - 0, - 10, - undefined, - undefined, - tmbId, - true, - undefined, - 'non-existent-app-id' - ); - - expect(Array.isArray(result.list)).toBe(true); - expect(result.total).toBe(0); - expect(result.list.length).toBe(0); - }); - }); - - describe('startEvaluation', () => { - test('应该成功启动评估任务', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Start Test Evaluation', - description: 'Test evaluation for start operation', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - await EvaluationTaskService.startEvaluation(created._id, teamId); - - // 验证任务已提交到队列 - expect(evaluationTaskQueue.add).toHaveBeenCalledWith(`eval_task_${created._id}`, { - evalId: created._id - }); - }); - - test('非排队状态的任务不能启动', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'No Start Test Evaluation', - description: 'Test evaluation for no start operation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 先更新状态为已完成 - await MongoEvaluation.updateOne( - { _id: created._id }, - { $set: { status: EvaluationStatusEnum.completed } } - ); - - await expect(EvaluationTaskService.startEvaluation(created._id, teamId)).rejects.toThrow( - 'evaluationInvalidStateTransition' - ); - }); - - test('应该能重启手动停止的任务', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Restart Test Evaluation', - description: 'Test evaluation for restart operation', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 先启动任务 - await EvaluationTaskService.startEvaluation(created._id, teamId); - - // 然后停止任务 - await EvaluationTaskService.stopEvaluation(created._id, teamId); - - // 验证任务已被标记为手动停止 - let evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.error); - expect(evaluation.errorMessage).toBe('Manually stopped'); - - // 现在应该能重启这个任务 - await EvaluationTaskService.startEvaluation(created._id, teamId); - - // 验证状态已更新 - evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluation.errorMessage).toBeUndefined(); - expect(evaluation.finishTime).toBeUndefined(); - - // 验证任务已重新提交到队列 - expect(evaluationTaskQueue.add).toHaveBeenLastCalledWith(`eval_task_${created._id}`, { - evalId: created._id - }); - }); - - test('因真实错误失败的任务不能重启', async () => { - // 先创建一个评估任务 - const params: CreateEvaluationParams = { - name: 'Error Test Evaluation', - description: 'Test evaluation with real error', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 手动设置为错误状态(但不是手动停止) - await MongoEvaluation.updateOne( - { _id: created._id }, - { - $set: { - status: EvaluationStatusEnum.error, - errorMessage: 'System error occurred', - finishTime: new Date() - } - } - ); - - // 尝试重启应该失败 - await expect(EvaluationTaskService.startEvaluation(created._id, teamId)).rejects.toThrow( - 'evaluationInvalidStateTransition' - ); - }); - - test('应该支持多次重启操作', async () => { - // 创建评估任务 - const params: CreateEvaluationParams = { - name: 'Multiple Restart Test', - description: 'Test multiple restart operations', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 第一次:启动 -> 停止 -> 重启 - await EvaluationTaskService.startEvaluation(created._id, teamId); - await EvaluationTaskService.stopEvaluation(created._id, teamId); - await EvaluationTaskService.startEvaluation(created._id, teamId); - - let evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - - // 第二次:停止 -> 重启 - await EvaluationTaskService.stopEvaluation(created._id, teamId); - await EvaluationTaskService.startEvaluation(created._id, teamId); - - evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluation.errorMessage).toBeUndefined(); - }); - - test('只有特定错误消息的任务才能重启', async () => { - // 创建评估任务 - const params: CreateEvaluationParams = { - name: 'Specific Error Message Test', - description: 'Test specific error message for restart', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 测试不同的错误消息 - const errorMessages = [ - 'manually stopped', // 小写,不应该允许重启 - 'MANUALLY STOPPED', // 大写,不应该允许重启 - 'Manually Stopped', // 不完全匹配,不应该允许重启 - 'Manually stopped by user', // 包含但不完全匹配,不应该允许重启 - 'System manually stopped' // 包含但不完全匹配,不应该允许重启 - ]; - - for (const errorMessage of errorMessages) { - // 设置错误状态 - await MongoEvaluation.updateOne( - { _id: created._id }, - { - $set: { - status: EvaluationStatusEnum.error, - errorMessage: errorMessage, - finishTime: new Date() - } - } - ); - - // 尝试重启应该失败 - await expect(EvaluationTaskService.startEvaluation(created._id, teamId)).rejects.toThrow( - 'evaluationInvalidStateTransition' - ); - } - - // 只有确切的 'Manually stopped' 才能重启 - await MongoEvaluation.updateOne( - { _id: created._id }, - { - $set: { - status: EvaluationStatusEnum.error, - errorMessage: 'Manually stopped', - finishTime: new Date() - } - } - ); - - // 这次应该成功 - await expect( - EvaluationTaskService.startEvaluation(created._id, teamId) - ).resolves.not.toThrow(); - }); - - test('重启时应该正确清理相关字段', async () => { - // 创建评估任务 - const params: CreateEvaluationParams = { - name: 'Field Cleanup Test', - description: 'Test field cleanup during restart', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 启动并停止任务 - await EvaluationTaskService.startEvaluation(created._id, teamId); - await EvaluationTaskService.stopEvaluation(created._id, teamId); - - // 验证停止后的状态 - let evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.error); - expect(evaluation.errorMessage).toBe('Manually stopped'); - expect(evaluation.finishTime).toBeDefined(); - - // 重启任务 - await EvaluationTaskService.startEvaluation(created._id, teamId); - - // 验证字段被正确清理 - evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - expect(evaluation.errorMessage).toBeUndefined(); - expect(evaluation.finishTime).toBeUndefined(); - }); - - test('重启已完成的任务应该失败', async () => { - // 创建评估任务 - const params: CreateEvaluationParams = { - name: 'Completed Task Restart Test', - description: 'Test restarting completed task', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 手动设置为已完成状态 - await MongoEvaluation.updateOne( - { _id: created._id }, - { - $set: { - status: EvaluationStatusEnum.completed, - finishTime: new Date() - } - } - ); - - // 尝试重启应该失败 - await expect(EvaluationTaskService.startEvaluation(created._id, teamId)).rejects.toThrow( - 'evaluationInvalidStateTransition' - ); - }); - - test('重启正在运行的任务应该失败', async () => { - // 创建评估任务 - const params: CreateEvaluationParams = { - name: 'Running Task Restart Test', - description: 'Test restarting running task', - evalDatasetCollectionId, - target, - evaluators: evaluators, - autoStart: false - }; - const created = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - - // 启动任务 - await EvaluationTaskService.startEvaluation(created._id, teamId); - - // 验证状态为运行中 - const evaluation = await EvaluationTaskService.getEvaluation(created._id, teamId); - expect(evaluation.status).toBe(EvaluationStatusEnum.queuing); - - // 尝试再次启动在mock环境下应该成功(实际环境中queue会防止重复) - await expect( - EvaluationTaskService.startEvaluation(created._id, teamId) - ).resolves.not.toThrow(); - }); - }); - - describe('stopEvaluation', () => { - test('应该成功停止评估任务', async () => { - // 为这个测试创建专门的evaluation - const params: CreateEvaluationParams = { - name: 'Test Evaluation for stopEvaluation', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 重置为evaluating状态 - await MongoEvaluation.updateOne( - { _id: testEvaluationId }, - { $set: { status: EvaluationStatusEnum.evaluating } } - ); - - // 创建一些测试评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test 1', expectedOutput: 'Answer 1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.evaluating - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test 2', expectedOutput: 'Answer 2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - } - ]); - - await EvaluationTaskService.stopEvaluation(testEvaluationId, teamId); - - // 验证评估任务状态 - const updatedEvaluation = await EvaluationTaskService.getEvaluation(testEvaluationId, teamId); - expect(updatedEvaluation.status).toBe(EvaluationStatusEnum.error); - expect(updatedEvaluation.errorMessage).toBe('Manually stopped'); - expect(updatedEvaluation.finishTime).toBeDefined(); - - // 验证评估项状态 - const items = await MongoEvalItem.find({ evalId: testEvaluationId }); - items.forEach((item) => { - expect(item.status).toBe(EvaluationStatusEnum.error); - expect(item.errorMessage).toBe('Manually stopped'); - expect(item.finishTime).toBeDefined(); - }); - }); - }); - - describe('getEvaluationStats', () => { - test('应该返回正确的统计信息', async () => { - // Clean up any leftover evaluation items and evaluations - await MongoEvalItem.deleteMany({}); - await MongoEvaluation.deleteMany({ teamId }); - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for getEvaluationStats', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建测试评估项 - 确保只有最后一个有错误 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 85 - } - } - ] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 95 - } - } - ] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q3', expectedOutput: 'A3' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.evaluating - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q4', expectedOutput: 'A4' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - } - ]); - - // 单独创建错误状态的项目 - await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Q5', expectedOutput: 'A5' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.error, - errorMessage: 'Test error' - }); - - const stats = await EvaluationTaskService.getEvaluationStats(testEvaluationId, teamId); - - expect(stats.total).toBe(7); // 2 from dataset + 4 manually created + 1 error item - expect(stats.completed).toBe(2); // 2个真正完成的 - expect(stats.evaluating).toBe(1); - expect(stats.queuing).toBe(3); - expect(stats.error).toBe(1); - }); - }); - - describe('listEvaluationItems', () => { - test('应该成功获取评估项列表', async () => { - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for listEvaluationItems', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建一些测试评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test userInput 1', expectedOutput: 'Test answer 1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test userInput 2', expectedOutput: 'Test answer 2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed - } - ]); - - const result = await EvaluationTaskService.listEvaluationItems( - testEvaluationId, - teamId, - 1, - 10 - ); - - expect(Array.isArray(result.items)).toBe(true); - expect(typeof result.total).toBe('number'); - expect(result.items.length).toBeGreaterThan(0); - - const item = result.items[0]; - expect(item._id).toBeDefined(); - expect(item.evalId.toString()).toBe(testEvaluationId.toString()); - expect(item.dataItem).toBeDefined(); - }); - }); - - describe('Evaluation Item Operations', () => { - describe('getEvaluationItem', () => { - test('应该成功获取评估项', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for getEvaluationItem', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - }); - const itemId = item._id.toString(); - - const retrievedItem = await EvaluationTaskService.getEvaluationItem(itemId, teamId); - - expect(retrievedItem._id.toString()).toBe(itemId); - expect(retrievedItem.evalId.toString()).toBe(testEvaluationId.toString()); - expect(retrievedItem.dataItem.userInput).toBe('Test Item'); - }); - - test('评估项不存在时应该抛出错误', async () => { - const nonExistentId = new Types.ObjectId().toString(); - - await expect( - EvaluationTaskService.getEvaluationItem(nonExistentId, teamId) - ).rejects.toThrow('evaluationItemNotFound'); - }); - }); - - describe('updateEvaluationItem', () => { - test('应该成功更新评估项', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for updateEvaluationItem', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - }); - const itemId = item._id.toString(); - - const updates = { - userInput: 'Updated user input', - expectedOutput: 'Updated expected output', - context: ['Updated context 1', 'Updated context 2'] - }; - - await EvaluationTaskService.updateEvaluationItem(itemId, updates, teamId); - - const updatedItem = await EvaluationTaskService.getEvaluationItem(itemId, teamId); - expect(updatedItem.dataItem.userInput).toBe(updates.userInput); - expect(updatedItem.dataItem.expectedOutput).toBe(updates.expectedOutput); - expect(updatedItem.dataItem.context).toEqual(updates.context); - }); - }); - - describe('retryEvaluationItem', () => { - test('应该成功重试失败的评估项', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for retryEvaluationItem', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.error, - errorMessage: 'Test error', - retry: 2 - }); - const itemId = item._id.toString(); - - await EvaluationTaskService.retryEvaluationItem(itemId, teamId); - - const retriedItem = await EvaluationTaskService.getEvaluationItem(itemId, teamId); - expect(retriedItem.status).toBe(EvaluationStatusEnum.queuing); - expect(retriedItem.errorMessage).toBeUndefined(); - expect(retriedItem.retry).toBeGreaterThanOrEqual(1); - }); - - test('非失败状态的评估项不能重试', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for retry error', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建一个已完成且无错误的评估项 - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - errorMessage: null // 确保没有错误消息 - }); - const itemId = item._id.toString(); - - await expect(EvaluationTaskService.retryEvaluationItem(itemId, teamId)).rejects.toThrow( - 'evaluationOnlyFailedCanRetry' - ); - }); - }); - - describe('deleteEvaluationItem', () => { - test('应该成功删除评估项', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for deleteEvaluationItem', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - }); - const itemId = item._id.toString(); - - await EvaluationTaskService.deleteEvaluationItem(itemId, teamId); - - await expect(EvaluationTaskService.getEvaluationItem(itemId, teamId)).rejects.toThrow( - 'evaluationItemNotFound' - ); - }); - }); - - describe('getEvaluationItemResult', () => { - test('应该返回评估项详细结果', async () => { - // 创建一个新的evaluation和item用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for getEvaluationItemResult', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - const item = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - targetOutput: { - actualOutput: 'Test response', - responseTime: 1000 - }, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 92, - runLogs: { test: true } - } - } - ] - }); - const itemId = item._id.toString(); - - const result = await EvaluationTaskService.getEvaluationItemResult(itemId, teamId); - - expect(result._id.toString()).toBe(itemId); - expect(result.dataItem.userInput).toBe('Test Item'); - expect(result.targetOutput?.actualOutput).toBe('Test response'); - expect(result.evaluatorOutputs?.[0].data?.score).toBe(92); - }); - }); - }); - - describe('exportEvaluationResults', () => { - test('应该成功导出 JSON 格式', async () => { - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for exportEvaluationResults JSON', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建一些测试数据 - await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test userInput', expectedOutput: 'Test answer' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.completed, - targetOutput: { - actualOutput: 'Test response', - responseTime: 1000 - }, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 85 - } - } - ] - }); - - const { results: buffer } = await EvaluationTaskService.exportEvaluationResults( - testEvaluationId, - teamId, - 'json' - ); - const data = JSON.parse(buffer.toString()); - - expect(Array.isArray(data)).toBe(true); - expect(data.length).toBeGreaterThan(0); - - const item = data[0]; - expect(item.itemId).toBeDefined(); - expect(item.userInput).toBeDefined(); - expect(item.expectedOutput).toBeDefined(); - }); - - test('应该成功导出 CSV 格式', async () => { - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for exportEvaluationResults CSV', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建一些测试数据 - await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'JavaScript userInput', expectedOutput: 'JS answer' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.completed, - targetOutput: { - actualOutput: 'JavaScript response', - responseTime: 1000 - }, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 85 - } - } - ] - }); - - const { results: buffer } = await EvaluationTaskService.exportEvaluationResults( - testEvaluationId, - teamId, - 'csv' - ); - const csvContent = buffer.toString(); - - expect(csvContent.includes('ItemId,UserInput,ExpectedOutput')).toBe(true); - expect(csvContent.includes('JavaScript userInput')).toBe(true); - }); - - test('空数据时应该返回空内容', async () => { - // 创建新的空评估任务 - const emptyEvaluation = await MongoEvaluation.create({ - teamId, - tmbId, - name: 'Empty Evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.queuing - }); - - const { results: buffer } = await EvaluationTaskService.exportEvaluationResults( - emptyEvaluation._id.toString(), - teamId, - 'csv' - ); - - expect(buffer.toString()).toBe(''); - }); - }); - - describe('retryFailedItems', () => { - test('应该批量重试失败的评估项', async () => { - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for retryFailedItems', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建失败的评估项和成功的评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Failed 1', expectedOutput: 'Answer 1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.error, - errorMessage: 'Error 1' - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Failed 2', expectedOutput: 'Answer 2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.error, - errorMessage: 'Error 2' - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Success', expectedOutput: 'Answer' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 90 - } - } - ] // 成功的项目,不应该被重试 - } - ]); - - const retryCount = await EvaluationTaskService.retryFailedItems(testEvaluationId, teamId); - - expect(retryCount).toBe(2); - - // 验证失败的项目被重试 - const failedItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem.userInput': { $in: ['Failed 1', 'Failed 2'] } - }); - - failedItems.forEach((item) => { - expect(item.status).toBe(EvaluationStatusEnum.queuing); - expect(item.errorMessage).toBeUndefined(); - }); - - // 验证成功的项目未受影响 - const successItem = await MongoEvalItem.findOne({ - evalId: testEvaluationId, - 'dataItem.userInput': 'Success' - }); - expect(successItem?.status).toBe(EvaluationStatusEnum.completed); - expect(successItem?.evaluatorOutputs?.[0]?.data?.score).toBe(90); - }); - }); - - describe('deleteEvaluation', () => { - test('应该成功删除评估任务及其评估项', async () => { - // 创建一个新的evaluation用于此测试 - const params: CreateEvaluationParams = { - name: 'Test Evaluation for delete', - description: 'A test evaluation', - evalDatasetCollectionId, - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const testEvaluationId = evaluation._id; - - // 创建一些评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test userInput 1', expectedOutput: 'Test answer 1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Test userInput 2', expectedOutput: 'Test answer 2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [ - { - metricName: 'Test Metric', - data: { - score: 85 - } - } - ] - } - ]); - - const itemCount = await MongoEvalItem.countDocuments({ evalId: testEvaluationId }); - expect(itemCount).toBeGreaterThan(0); // 确保有评估项存在 - - await EvaluationTaskService.deleteEvaluation(testEvaluationId, teamId); - - // 验证评估任务被删除 - await expect(EvaluationTaskService.getEvaluation(testEvaluationId, teamId)).rejects.toThrow( - 'evaluationTaskNotFound' - ); - - // 验证所有评估项被删除 - const remainingItems = await MongoEvalItem.countDocuments({ evalId: testEvaluationId }); - expect(remainingItems).toBe(0); - }); - }); - - // ========================= 评估任务处理流程测试 ========================= - describe('Evaluation Task Processing Flow', () => { - let mockTargetInstance: any; - let mockEvaluatorInstance: any; - - beforeEach(() => { - // Mock target and evaluator instances - mockTargetInstance = { - execute: vi.fn().mockResolvedValue({ - actualOutput: 'Mock target output', - responseTime: 1000, - retrievalContext: ['context1', 'context2'], - usage: [{ totalPoints: 50 }] - }) - }; - - mockEvaluatorInstance = { - evaluate: vi.fn().mockResolvedValue({ - metricName: 'Test Metric', - status: MetricResultStatusEnum.Success, - data: { - score: 85, - runLogs: { usage: { totalPoints: 20 } } - }, - totalPoints: 20, - usages: [{ promptTokens: 10, completionTokens: 10 }] - }) - }; - - (createTargetInstance as any).mockReturnValue(mockTargetInstance); - (createEvaluatorInstance as any).mockResolvedValue(mockEvaluatorInstance); - }); - - test('应该正确处理评估任务流程', async () => { - // Import the processor module after mocking - const { evaluationTaskProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - // 为这个测试创建独立的数据集 - const testDataset = await MongoEvalDatasetCollection.create({ - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Test Dataset for Task Processing', - description: 'Dataset for task processing test' - }); - - // 创建数据集数据项 - await MongoEvalDatasetData.create([ - { - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - evalDatasetCollectionId: testDataset._id, - userInput: 'What is AI?', - expectedOutput: 'Artificial Intelligence' - }, - { - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - evalDatasetCollectionId: testDataset._id, - userInput: 'What is ML?', - expectedOutput: 'Machine Learning' - } - ]); - - // 创建测试评估任务 - const params: CreateEvaluationParams = { - name: 'Processing Flow Test', - description: 'Test evaluation processing', - evalDatasetCollectionId: testDataset._id.toString(), - target, - evaluators: evaluators - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - const evalId = evaluation._id; - - // Mock job data for task processor - const taskJobData: EvaluationTaskJobData = { - evalId - }; - - const mockJob = { - data: taskJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - // 执行任务处理器 - await evaluationTaskProcessor(mockJob); - - // 验证评估项是否被创建 - const evalItems = await MongoEvalItem.find({ evalId }); - // 数据集有2个dataItems,每个有1个evaluator,总共应该有2个评估项 - expect(evalItems.length).toBe(2); - - // 验证评估项队列是否被调用 - expect(evaluationItemQueue.addBulk).toHaveBeenCalled(); - }); - - test('应该正确处理评估项执行流程', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - // 创建测试评估项 - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { - userInput: 'Test input', - expectedOutput: 'Expected output', - context: ['context1'] - }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - // Mock evaluation record - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Test Evaluation Item Processing', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - // 执行评估项处理器 - await evaluationItemProcessor(mockJob); - - // 验证评估项状态更新 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.completed); - expect(updatedItem?.targetOutput).toBeDefined(); - expect(updatedItem?.evaluatorOutputs).toBeDefined(); - expect(updatedItem?.evaluatorOutputs?.length).toBeGreaterThan(0); - - // 验证目标和评估器被调用 - expect(mockTargetInstance.execute).toHaveBeenCalledWith({ - userInput: 'Test input', - context: ['context1'], - targetCallParams: undefined - }); - - expect(mockEvaluatorInstance.evaluate).toHaveBeenCalledWith({ - userInput: 'Test input', - expectedOutput: 'Expected output', - actualOutput: 'Mock target output', - context: ['context1'], - retrievalContext: ['context1', 'context2'] - }); - }); - - test('应该支持检查点恢复机制', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - // 创建部分完成的评估项(已有target输出) - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { - userInput: 'Test input', - expectedOutput: 'Expected output' - }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.evaluating, - targetOutput: { - actualOutput: 'Existing target output', - responseTime: 500, - usage: [{ totalPoints: 30 }] - }, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Test Checkpoint Recovery', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证target不被重新执行,直接使用已有输出 - expect(mockTargetInstance.execute).not.toHaveBeenCalled(); - - // 验证evaluator使用已有的target输出 - expect(mockEvaluatorInstance.evaluate).toHaveBeenCalledWith({ - userInput: 'Test input', - expectedOutput: 'Expected output', - actualOutput: 'Existing target output', - context: undefined, - retrievalContext: undefined - }); - }); - }); - - // ========================= 重试机制验收测试 ========================= - describe('Retry Mechanism Tests', () => { - let mockTargetInstance: any; - let mockEvaluatorInstance: any; - - beforeEach(() => { - mockTargetInstance = { - execute: vi.fn() - }; - mockEvaluatorInstance = { - evaluate: vi.fn() - }; - (createTargetInstance as any).mockReturnValue(mockTargetInstance); - (createEvaluatorInstance as any).mockResolvedValue(mockEvaluatorInstance); - }); - - test('网络错误应该触发重试机制', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - // 创建测试评估项 - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Network test', expectedOutput: 'Expected' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Network Error Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock network error - const networkError = new Error('NETWORK_ERROR: Connection timeout'); - mockTargetInstance.execute.mockRejectedValue(networkError); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证评估项被重新入队 - expect(evaluationItemQueue.add).toHaveBeenCalledWith( - expect.stringContaining(`eval_item_${evalItem._id.toString()}_retry`), - { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }, - expect.objectContaining({ - delay: expect.any(Number) - }) - ); - - // 验证重试计数器减少 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.retry).toBe(2); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); - }); - - test('非可重试错误应该直接标记为失败', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Fatal error test', expectedOutput: 'Expected' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Fatal Error Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock fatal error (not retriable) - const fatalError = new Error('FATAL_ERROR: Invalid configuration'); - mockTargetInstance.execute.mockRejectedValue(fatalError); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证不会重新入队 - expect(evaluationItemQueue.add).not.toHaveBeenCalledWith( - expect.stringContaining('eval_item_retry_'), - expect.any(Object), - expect.any(Object) - ); - - // 验证直接标记为错误 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.error); - expect(updatedItem?.retry).toBe(0); - expect(updatedItem?.errorMessage).toContain('FATAL_ERROR'); - }); - - test('重试次数耗尽时应该标记为失败', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Exhausted retry test', expectedOutput: 'Expected' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.queuing, - retry: 1 // 只剩1次重试机会 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Exhausted Retry Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock retriable error - const retriableError = new Error('TIMEOUT: Request timeout'); - mockTargetInstance.execute.mockRejectedValue(retriableError); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证最后一次重试失败后不再重新入队 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.error); - expect(updatedItem?.retry).toBe(0); - expect(updatedItem?.errorMessage).toContain('TIMEOUT'); - }); - - test('AI积分不足应该暂停整个任务项', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'AI Points test', expectedOutput: 'Expected' }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'AI Points Insufficient Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock AI Points insufficient error - (checkTeamAIPoints as any).mockRejectedValue(TeamErrEnum.aiPointsNotEnough); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证任务被执行完成, 任务项被暂停(error) - const updatedEvaluation = await MongoEvaluation.findById(testEvaluationId); - const updatedEvaluationItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedEvaluation?.status).toBe(EvaluationStatusEnum.error); - expect(updatedEvaluationItem?.status).toBe(EvaluationStatusEnum.error); - expect(updatedEvaluationItem?.errorMessage).toBe( - '[ResourceCheck] ' + getErrText(TeamErrEnum.aiPointsNotEnough) - ); - }); - - test('指数退避延迟应该正确计算', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testCases = [ - { retry: 3, expectedMaxDelay: 2000 }, // newRetryCount=2: 2^(3-2) * 1000 = 2000 - { retry: 2, expectedMaxDelay: 4000 }, // newRetryCount=1: 2^(3-1) * 1000 = 4000 - { retry: 1, expectedMaxDelay: 8000 } // newRetryCount=0: 2^(3-0) * 1000 = 8000 - ]; - - for (const testCase of testCases) { - // 清除之前的mock调用 - (evaluationItemQueue.add as any).mockClear(); - - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: `Backoff test ${testCase.retry}`, expectedOutput: 'Expected' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.queuing, - retry: testCase.retry - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: `Backoff Test ${testCase.retry}`, - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock network error - const networkError = new Error('ECONNRESET'); - mockTargetInstance.execute.mockRejectedValue(networkError); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证延迟参数 - 因为是重试,应该会调用add方法 - if ((evaluationItemQueue.add as any).mock.calls.length > 0) { - const addCall = (evaluationItemQueue.add as any).mock.calls[0]; - const delayOption = addCall[2]; - console.log( - `Retry ${testCase.retry} -> Expected: ${testCase.expectedMaxDelay}, Actual: ${delayOption.delay}` - ); - expect(delayOption.delay).toBe(testCase.expectedMaxDelay); - } - } - }); - }); - - // ========================= 数据一致性和并发处理测试 ========================= - describe('Data Consistency and Concurrency Tests', () => { - test('任务完成检查应该防止竞争条件', async () => { - const { finishEvaluationTask } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - // 创建测试任务 - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Concurrency Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // 创建已完成的评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [{ metricName: 'Test', data: { score: 95 } }] - } - ]); - - // 同时调用多次完成检查(模拟并发场景) - await Promise.all([ - finishEvaluationTask(testEvaluationId.toString()), - finishEvaluationTask(testEvaluationId.toString()), - finishEvaluationTask(testEvaluationId.toString()) - ]); - - // 验证任务状态正确更新 - const finalEvaluation = await MongoEvaluation.findById(testEvaluationId); - expect(finalEvaluation?.status).toBe(EvaluationStatusEnum.completed); - expect(finalEvaluation?.finishTime).toBeDefined(); - }); - - test('部分完成的任务不应该被标记为完成', async () => { - const { finishEvaluationTask } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - const originalEvaluation = await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Partial Completion Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // 创建混合状态的评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.evaluating // 仍在处理中 - } - ]); - - await finishEvaluationTask(testEvaluationId.toString()); - - // 验证任务不会被标记为完成 - const evaluation = await MongoEvaluation.findById(testEvaluationId); - // finishEvaluationTask应该检测到有pending项目,不更新任务状态 - // 因此状态应该保持原来的evaluating状态 - expect(evaluation?.status).toBe(originalEvaluation.status); - expect(evaluation?.finishTime).toBeUndefined(); - }); - }); - - // ========================= 聚合错误处理测试 ========================= - describe('Aggregated Error Handling Tests', () => { - let mockTargetInstance: any; - let mockEvaluatorInstance: any; - - beforeEach(() => { - // Clear all mocks before each test - vi.clearAllMocks(); - - // Reset AI Points check to pass normally - (checkTeamAIPoints as any).mockResolvedValue(undefined); - - mockTargetInstance = { - execute: vi.fn().mockResolvedValue({ - actualOutput: 'Mock target output', - responseTime: 1000, - usage: [{ totalPoints: 50 }] - }) - }; - mockEvaluatorInstance = { - evaluate: vi.fn() - }; - (createTargetInstance as any).mockReturnValue(mockTargetInstance); - (createEvaluatorInstance as any).mockResolvedValue(mockEvaluatorInstance); - }); - - test('应该收集所有评估器错误并继续执行', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - // 创建有多个评估器的评估项 - const multipleEvaluators = [ - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 1', - type: EvalMetricTypeEnum.Custom, - prompt: 'Test prompt 1' - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - }, - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 2', - type: EvalMetricTypeEnum.Custom, - prompt: 'Test prompt 2' - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - }, - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 3', - type: EvalMetricTypeEnum.Custom, - prompt: 'Test prompt 3' - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - } - ]; - - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, - target, - evaluators: multipleEvaluators, - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Aggregated Error Test', - evalDatasetCollectionId, - target, - evaluators: multipleEvaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock evaluators - 第一个成功,第二个和第三个失败(使用明确不可重试的错误) - mockEvaluatorInstance.evaluate - .mockResolvedValueOnce({ - metricName: 'Metric 1', - status: MetricResultStatusEnum.Success, - data: { score: 85 }, - totalPoints: 20, - usages: [{ promptTokens: 10, completionTokens: 10 }] - }) - .mockResolvedValueOnce({ - metricName: 'Metric 2', - status: MetricResultStatusEnum.Failed, - error: 'AUTHENTICATION_FAILED: Invalid API key provided.', - totalPoints: 15, - usages: [{ promptTokens: 8, completionTokens: 7 }] - }) - .mockResolvedValueOnce({ - metricName: 'Metric 3', - status: MetricResultStatusEnum.Failed, - error: 'VALIDATION_ERROR: Input validation failed.', - totalPoints: 10, - usages: [{ promptTokens: 5, completionTokens: 5 }] - }); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证评估项被标记为错误状态 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.error); - - // 验证错误消息包含所有失败的评估器信息 - expect(updatedItem?.errorMessage).toContain('[EvaluatorExecute]'); - expect(updatedItem?.errorMessage).toContain( - 'Metric 2: AUTHENTICATION_FAILED: Invalid API key provided.' - ); - expect(updatedItem?.errorMessage).toContain( - 'Metric 3: VALIDATION_ERROR: Input validation failed.' - ); - - // 验证所有评估器的用量都被记录 - expect(concatUsage).toHaveBeenCalled(); - - // 验证所有三个评估器都被调用 - expect(mockEvaluatorInstance.evaluate).toHaveBeenCalledTimes(3); - }); - - test('应该正确处理部分评估器失败的聚合错误可重试性', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - const multipleEvaluators = [ - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 1', - type: EvalMetricTypeEnum.Custom - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - }, - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 2', - type: EvalMetricTypeEnum.Custom - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - } - ]; - - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, - target, - evaluators: multipleEvaluators, - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Retry Aggregated Error Test', - evalDatasetCollectionId, - target, - evaluators: multipleEvaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock evaluators - 一个可重试错误,一个不可重试错误 - mockEvaluatorInstance.evaluate - .mockResolvedValueOnce({ - metricName: 'Metric 1', - status: MetricResultStatusEnum.Failed, - error: 'TIMEOUT: Request timeout', // 可重试 - totalPoints: 20, - usages: [{ promptTokens: 10, completionTokens: 10 }] - }) - .mockResolvedValueOnce({ - metricName: 'Metric 2', - status: MetricResultStatusEnum.Failed, - error: 'INVALID_CONFIG: Configuration error', // 不可重试 - totalPoints: 15, - usages: [{ promptTokens: 8, completionTokens: 7 }] - }); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证评估项被重新排队(因为有可重试错误) - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); - expect(updatedItem?.retry).toBe(2); - - // 验证重新排队的调用 - expect(evaluationItemQueue.add).toHaveBeenCalledWith( - expect.stringContaining(`eval_item_${evalItem._id.toString()}_retry`), - { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }, - expect.objectContaining({ - delay: expect.any(Number) - }) - ); - }); - - test('应该正确处理所有评估器都成功的情况', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - const multipleEvaluators = [ - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 1', - type: EvalMetricTypeEnum.Custom - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - }, - { - metric: { - _id: new Types.ObjectId(), - name: 'Metric 2', - type: EvalMetricTypeEnum.Custom - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - } - ]; - - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, - target, - evaluators: multipleEvaluators, - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'All Success Test', - evalDatasetCollectionId, - target, - evaluators: multipleEvaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock evaluators - 都成功 - mockEvaluatorInstance.evaluate - .mockResolvedValueOnce({ - metricName: 'Metric 1', - status: MetricResultStatusEnum.Success, - data: { score: 85 }, - totalPoints: 20, - usages: [{ promptTokens: 10, completionTokens: 10 }] - }) - .mockResolvedValueOnce({ - metricName: 'Metric 2', - status: MetricResultStatusEnum.Success, - data: { score: 90 }, - totalPoints: 15, - usages: [{ promptTokens: 8, completionTokens: 7 }] - }); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证评估项被标记为完成 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.completed); - expect(updatedItem?.evaluatorOutputs).toHaveLength(2); - expect(updatedItem?.evaluatorOutputs?.[0].data?.score).toBe(85); - expect(updatedItem?.evaluatorOutputs?.[1].data?.score).toBe(90); - - // 验证用量记录 - expect(concatUsage).toHaveBeenCalled(); - }); - - test('应该在评估器抛出异常时正确处理', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - const singleEvaluator = [ - { - metric: { - _id: new Types.ObjectId(), - name: 'Exception Metric', - type: EvalMetricTypeEnum.Custom - }, - runtimeConfig: { llm: 'gpt-3.5-turbo' } - } - ]; - - const evalItem = await MongoEvalItem.create({ - evalId: testEvaluationId, - dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, - target, - evaluators: singleEvaluator, - status: EvaluationStatusEnum.queuing, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Exception Test', - evalDatasetCollectionId, - target, - evaluators: singleEvaluator, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Mock evaluator抛出异常 - mockEvaluatorInstance.evaluate.mockRejectedValue( - new Error('NETWORK_ERROR: Connection failed') - ); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证评估项被重新排队(异常应该被当作可重试错误处理) - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); - expect(updatedItem?.retry).toBe(2); - expect(updatedItem?.errorMessage).toContain('[EvaluatorExecute]'); - expect(updatedItem?.errorMessage).toContain('NETWORK_ERROR'); - }); - }); - - // ========================= 错误处理和状态管理测试 ========================= - describe('Error Handling and Status Management Tests', () => { - test('评估项处理失败时应该正确清理状态', async () => { - const { evaluationItemProcessor } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - const evalItem = await MongoEvalItem.create({ - _id: '6666666c506834bfaa7a3a0d', - evalId: testEvaluationId, - dataItem: { userInput: 'Error cleanup test', expectedOutput: 'Expected' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.evaluating, - targetOutput: { actualOutput: 'Partial result', responseTime: 500 }, - retry: 3 - }); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Error Cleanup Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // Reset AI Points check to pass normally - (checkTeamAIPoints as any).mockResolvedValue(undefined); - - const mockTargetInstance = { - execute: vi.fn().mockRejectedValue(new Error('NETWORK_ERROR: Cleanup test')) - }; - (createTargetInstance as any).mockReturnValue(mockTargetInstance); - - const mockEvaluatorInstance = { - evaluate: vi.fn().mockRejectedValue(new Error('NETWORK_ERROR: Cleanup test')) - }; - (createEvaluatorInstance as any).mockResolvedValue(mockEvaluatorInstance); - - const itemJobData: EvaluationItemJobData = { - evalId: testEvaluationId.toString(), - evalItemId: evalItem._id.toString() - }; - - const mockJob = { - data: itemJobData, - updateProgress: vi.fn().mockResolvedValue(undefined) - } as any; - - await evaluationItemProcessor(mockJob); - - // 验证部分结果被清理 - const updatedItem = await MongoEvalItem.findById(evalItem._id); - expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); - expect(updatedItem?.retry).toBe(2); - }); - - test('任务完成统计应该正确处理所有状态', async () => { - const { finishEvaluationTask } = await import( - '@fastgpt/service/core/evaluation/task/processor' - ); - - const testEvaluationId = new Types.ObjectId(); - - await MongoEvaluation.create({ - _id: testEvaluationId, - teamId: new Types.ObjectId(teamId), - tmbId: new Types.ObjectId(tmbId), - name: 'Statistics Test', - evalDatasetCollectionId, - target, - evaluators: evaluators, - usageId: new Types.ObjectId(), - status: EvaluationStatusEnum.evaluating - }); - - // 创建各种状态的评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { userInput: 'Success 1', expectedOutput: 'A1' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Success 2', expectedOutput: 'A2' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.completed, - evaluatorOutputs: [{ metricName: 'Test', data: { score: 95 } }] - }, - { - evalId: testEvaluationId, - dataItem: { userInput: 'Failed', expectedOutput: 'A3' }, - target, - evaluators: [evaluators[0]], - status: EvaluationStatusEnum.error, - errorMessage: 'Test error' - } - ]); - - await finishEvaluationTask(testEvaluationId.toString()); - - const finalEvaluation = await MongoEvaluation.findById(testEvaluationId); - expect(finalEvaluation?.status).toBe(EvaluationStatusEnum.error); - expect(finalEvaluation?.statistics?.totalItems).toBe(3); - expect(finalEvaluation?.statistics?.completedItems).toBe(2); - expect(finalEvaluation?.statistics?.errorItems).toBe(1); - }); - }); -}); diff --git a/test/cases/service/core/evaluation/task/evaluationTaskService.test.ts b/test/cases/service/core/evaluation/task/evaluationTaskService.test.ts new file mode 100644 index 000000000000..26e6bd6fbff0 --- /dev/null +++ b/test/cases/service/core/evaluation/task/evaluationTaskService.test.ts @@ -0,0 +1,524 @@ +import { beforeAll, afterAll, beforeEach, describe, test, expect, vi } from 'vitest'; +import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; +import { MongoEvaluation, MongoEvalItem } from '@fastgpt/service/core/evaluation/task/schema'; +import { MongoEvalDatasetCollection } from '@fastgpt/service/core/evaluation/dataset/evalDatasetCollectionSchema'; +import { MongoEvalDatasetData } from '@fastgpt/service/core/evaluation/dataset/evalDatasetDataSchema'; +import { MongoEvalMetric } from '@fastgpt/service/core/evaluation/metric/schema'; +import type { + CreateEvaluationParams, + EvalTarget, + EvaluatorSchema +} from '@fastgpt/global/core/evaluation/type'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import { Types } from '@fastgpt/service/common/mongo'; +import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; +import { EvalMetricTypeEnum } from '@fastgpt/global/core/evaluation/metric/constants'; + +// Mock all external dependencies +vi.mock('@fastgpt/service/core/evaluation/task/mq'); +vi.mock('@fastgpt/service/support/wallet/usage/controller'); +vi.mock('@fastgpt/service/common/system/log'); +vi.mock('@fastgpt/service/core/evaluation/summary/util/weightCalculator'); +vi.mock('@fastgpt/service/core/evaluation/task/statusCalculator'); + +describe('EvaluationTaskService Integration Tests', () => { + let teamId: string; + let tmbId: string; + let evalDatasetCollectionId: string; + let target: EvalTarget; + let metricId: string; + let evaluators: EvaluatorSchema[]; + + // 通用重试函数处理MongoDB锁冲突 + const retryOnLockError = async ( + operation: () => Promise, + maxRetries: number = 3 + ): Promise => { + let retryCount = 0; + + while (retryCount < maxRetries) { + try { + return await operation(); + } catch (error: any) { + retryCount++; + if ( + (error?.message?.includes('Collection namespace') && + error?.message?.includes('is already in use')) || + error?.message?.includes('Unable to acquire IX lock') || + error?.code === 50 || // MaxTimeMSExpired + retryCount < maxRetries + ) { + // 等待一段时间后重试 + await new Promise((resolve) => setTimeout(resolve, 100 * retryCount)); + continue; + } + throw error; // 非锁冲突错误或重试次数用完,直接抛出 + } + } + throw new Error('Max retries exceeded'); + }; + + beforeAll(async () => { + teamId = '507f1f77bcf86cd799439011'; + tmbId = '507f1f77bcf86cd799439012'; + + target = { + type: 'workflow', + config: { + appId: '507f1f77bcf86cd799439011', + versionId: '507f1f77bcf86cd799439012', + chatConfig: { + temperature: 0.7, + maxToken: 2000 + } + } + }; + }); + + afterAll(async () => { + // 清理测试数据 + await Promise.all([ + MongoEvaluation.deleteMany({ teamId }), + MongoEvalItem.deleteMany({}), + MongoEvalDatasetCollection.deleteMany({ teamId }), + MongoEvalDatasetData.deleteMany({ teamId }), + MongoEvalMetric.deleteMany({ teamId }) + ]); + }); + + beforeEach(async () => { + vi.clearAllMocks(); + + // Setup test data + const dataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Test Dataset', + description: 'Dataset for task testing' + }); + evalDatasetCollectionId = dataset._id.toString(); + + // 创建数据集数据项 + await MongoEvalDatasetData.create([ + { + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + evalDatasetCollectionId: dataset._id, + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + }, + { + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + evalDatasetCollectionId: dataset._id, + userInput: 'What is ML?', + expectedOutput: 'Machine Learning' + } + ]); + + const metric = await MongoEvalMetric.create({ + teamId: teamId, + tmbId: tmbId, + name: 'Test Metric', + description: 'Metric for task testing', + type: EvalMetricTypeEnum.Custom, + prompt: 'Please evaluate the quality of the response.', + llmRequired: true, + userInputRequired: true, + actualOutputRequired: true, + expectedOutputRequired: true, + createTime: new Date(), + updateTime: new Date() + }); + + metricId = metric._id.toString(); + + evaluators = [ + { + metric: metric.toObject(), + runtimeConfig: { + llm: 'gpt-3.5-turbo' + }, + thresholdValue: 0.8 + } + ]; + + // Setup mocks + const { createEvaluationUsage } = await import( + '@fastgpt/service/support/wallet/usage/controller' + ); + const { getEvaluationTaskStatus, getEvaluationTaskStats } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + const { buildEvalDataConfig } = await import( + '@fastgpt/service/core/evaluation/summary/util/weightCalculator' + ); + + (createEvaluationUsage as any).mockResolvedValue({ billId: new Types.ObjectId() }); + (getEvaluationTaskStatus as any).mockResolvedValue(EvaluationStatusEnum.queuing); + (getEvaluationTaskStats as any).mockResolvedValue({ + total: 2, + completed: 0, + evaluating: 0, + queuing: 2, + error: 0 + }); + (buildEvalDataConfig as any).mockImplementation((evaluators) => ({ + evaluators: evaluators.map((evaluator) => ({ + metric: evaluator.metric, + runtimeConfig: evaluator.runtimeConfig, + thresholdValue: evaluator.thresholdValue ?? 0.8 + })), + summaryConfigs: evaluators.map((evaluator, index) => ({ + metricId: evaluator.metric._id.toString(), + metricName: evaluator.metric.name, + weight: 100, + calculateType: 'mean', + score: 0, + summary: '', + summaryStatus: 'pending', + errorReason: '', + completedItemCount: 0, + overThresholdItemCount: 0, + thresholdPassRate: 0 + })) + })); + }); + + describe('基本CRUD操作', () => { + test('应该成功创建评估任务', async () => { + const params: CreateEvaluationParams = { + name: 'Test Evaluation', + description: 'A test evaluation for unit testing', + evalDatasetCollectionId, + target: { + type: 'workflow', + config: { + appId: '507f1f77bcf86cd799439013', + versionId: '507f1f77bcf86cd799439014', + chatConfig: {} + } + }, + evaluators: evaluators, + autoStart: false + }; + + const evaluation = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + expect(evaluation.name).toBe(params.name); + expect(evaluation.description).toBe(params.description); + expect(evaluation.evalDatasetCollectionId.toString()).toBe(evalDatasetCollectionId); + expect(evaluation.target.type).toBe('workflow'); + expect(evaluation.target.config.appId).toBe('507f1f77bcf86cd799439013'); + expect(evaluation.evaluators).toHaveLength(1); + expect(evaluation.teamId.toString()).toBe(teamId); + expect(evaluation.tmbId.toString()).toBe(tmbId); + expect(Types.ObjectId.isValid(evaluation.usageId)).toBe(true); + }); + + test('应该成功获取评估任务', async () => { + const params: CreateEvaluationParams = { + name: 'Get Test Evaluation', + description: 'Test evaluation for get operation', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + + const created = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + const evaluation = await EvaluationTaskService.getEvaluation(created._id.toString(), teamId); + + expect(evaluation._id.toString()).toBe(created._id.toString()); + expect(evaluation.name).toBe('Get Test Evaluation'); + }); + + test('应该成功更新评估任务', async () => { + const params: CreateEvaluationParams = { + name: 'Update Test Evaluation', + description: 'Original description', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + const created = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + const updates = { + name: 'Updated Test Evaluation', + description: 'Updated description' + }; + + await EvaluationTaskService.updateEvaluation(created._id.toString(), updates, teamId); + + const updatedEvaluation = await EvaluationTaskService.getEvaluation( + created._id.toString(), + teamId + ); + expect(updatedEvaluation.name).toBe(updates.name); + expect(updatedEvaluation.description).toBe(updates.description); + }); + + test('应该成功删除评估任务', async () => { + const params: CreateEvaluationParams = { + name: 'Delete Test Evaluation', + description: 'Test evaluation for deletion', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + const evaluation = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + const testEvaluationId = evaluation._id.toString(); + + await EvaluationTaskService.deleteEvaluation(testEvaluationId, teamId); + + await expect(EvaluationTaskService.getEvaluation(testEvaluationId, teamId)).rejects.toThrow( + EvaluationErrEnum.evalTaskNotFound + ); + }); + }); + + describe('列表和统计功能', () => { + test('应该成功获取评估任务列表', async () => { + await MongoEvaluation.deleteMany({ teamId }); + + const params: CreateEvaluationParams = { + name: 'List Test Evaluation', + description: 'Test evaluation for list operation', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + const result = await EvaluationTaskService.listEvaluations( + teamId, + 0, + 10, + undefined, + undefined, + tmbId, + true + ); + + expect(Array.isArray(result.list)).toBe(true); + expect(typeof result.total).toBe('number'); + expect(result.list.length).toBeGreaterThanOrEqual(1); + }); + + test('应该返回正确的统计信息', async () => { + const params: CreateEvaluationParams = { + name: 'Stats Test Evaluation', + description: 'Test evaluation for stats', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + const evaluation = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + const stats = await EvaluationTaskService.getEvaluationStats( + evaluation._id.toString(), + teamId + ); + + expect(stats.total).toBe(2); + expect(stats.completed).toBe(0); + expect(stats.evaluating).toBe(0); + expect(stats.queuing).toBe(2); + expect(stats.error).toBe(0); + expect(typeof stats.failed).toBe('number'); + }); + }); + + describe('错误处理', () => { + test('缺少必填字段时应该抛出错误', async () => { + const invalidParams = { + name: 'Invalid Evaluation' + }; + + await expect(EvaluationTaskService.createEvaluation(invalidParams as any)).rejects.toThrow(); + }); + + test('评估任务不存在时应该抛出错误', async () => { + const nonExistentId = new Types.ObjectId().toString(); + + await expect(EvaluationTaskService.getEvaluation(nonExistentId, teamId)).rejects.toThrow( + EvaluationErrEnum.evalTaskNotFound + ); + }); + + test('数据集为空时应该抛出错误', async () => { + const emptyDataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Empty Dataset', + description: 'Empty dataset for testing' + }); + + const params: CreateEvaluationParams = { + name: 'Test with Empty Dataset', + description: 'Test evaluation with empty dataset', + evalDatasetCollectionId: emptyDataset._id.toString(), + target, + evaluators: evaluators, + autoStart: false + }; + + await expect( + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ).rejects.toThrow(EvaluationErrEnum.evalDatasetLoadFailed); + }); + }); + + describe('导出功能', () => { + test('应该成功导出 JSON 格式', async () => { + const params: CreateEvaluationParams = { + name: 'Export Test Evaluation JSON', + description: 'Test evaluation for JSON export', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + const evaluation = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + await MongoEvalItem.create({ + evalId: evaluation._id, + dataItem: { userInput: 'Test userInput', expectedOutput: 'Test answer' }, + targetOutput: { + actualOutput: 'Test response', + responseTime: 1000 + }, + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 85 + } + } + ] + }); + + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([['someId', EvaluationStatusEnum.completed]]) + ); + + const { results: buffer, total } = await EvaluationTaskService.exportEvaluationResults( + evaluation._id.toString(), + teamId, + 'json' + ); + const data = JSON.parse(buffer.toString()); + + expect(Array.isArray(data)).toBe(true); + expect(total).toBeGreaterThanOrEqual(1); + }); + + test('应该成功导出 CSV 格式', async () => { + const params: CreateEvaluationParams = { + name: 'Export Test Evaluation CSV', + description: 'Test evaluation for CSV export', + evalDatasetCollectionId, + target, + evaluators: evaluators, + autoStart: false + }; + const evaluation = await retryOnLockError(() => + EvaluationTaskService.createEvaluation({ + ...params, + teamId: teamId, + tmbId: tmbId + }) + ); + + await MongoEvalItem.create({ + evalId: evaluation._id, + dataItem: { userInput: 'CSV Test userInput', expectedOutput: 'CSV Test answer' }, + targetOutput: { + actualOutput: 'CSV Test response', + responseTime: 1000 + }, + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 85, + metricName: 'Test Metric' + } + } + ] + }); + + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([['someId', EvaluationStatusEnum.completed]]) + ); + + const { results: buffer, total } = await EvaluationTaskService.exportEvaluationResults( + evaluation._id.toString(), + teamId, + 'csv' + ); + const csvContent = buffer.toString(); + + expect(total).toBeGreaterThanOrEqual(1); + expect(csvContent.includes('ItemId,UserInput,ExpectedOutput')).toBe(true); + expect(csvContent.includes('CSV Test userInput')).toBe(true); + }); + }); +}); diff --git a/test/cases/service/core/evaluation/task/mq.test.ts b/test/cases/service/core/evaluation/task/mq.test.ts new file mode 100644 index 000000000000..af4525fffc61 --- /dev/null +++ b/test/cases/service/core/evaluation/task/mq.test.ts @@ -0,0 +1,459 @@ +import { beforeAll, afterAll, beforeEach, describe, test, expect, vi } from 'vitest'; +import { Types } from '@fastgpt/service/common/mongo'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import { MongoEvaluation, MongoEvalItem } from '@fastgpt/service/core/evaluation/task/schema'; +import { MongoEvalDatasetCollection } from '@fastgpt/service/core/evaluation/dataset/evalDatasetCollectionSchema'; +import { MongoEvalDatasetData } from '@fastgpt/service/core/evaluation/dataset/evalDatasetDataSchema'; +import { MongoEvalMetric } from '@fastgpt/service/core/evaluation/metric/schema'; +import { EvalMetricTypeEnum } from '@fastgpt/global/core/evaluation/metric/constants'; +import type { EvaluatorSchema, EvalTarget } from '@fastgpt/global/core/evaluation/type'; + +// Mock BullMQ +vi.mock('@fastgpt/service/common/bullmq', () => ({ + getQueue: vi.fn(() => ({ + add: vi.fn().mockResolvedValue({ id: 'test-job-id' }), + addBulk: vi.fn().mockResolvedValue([{ id: 'bulk-job-1' }, { id: 'bulk-job-2' }]), + getJob: vi.fn(), + getJobs: vi.fn().mockResolvedValue([]) + })), + getWorker: vi.fn(() => ({ + on: vi.fn() + })), + QueueNames: { + evalTask: 'evalTask', + evalTaskItem: 'evalTaskItem' + } +})); + +// Mock job cleanup +vi.mock('@fastgpt/service/core/evaluation/utils/jobCleanup', () => ({ + createJobCleaner: vi.fn(() => ({ + cleanAllJobsByFilter: vi.fn().mockResolvedValue({ + queue: 'test-queue', + totalJobs: 2, + removedJobs: 2, + failedRemovals: 0, + errors: [] + }) + })) +})); + +// Mock system log +vi.mock('@fastgpt/service/common/system/log', () => ({ + addLog: { + debug: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + info: vi.fn() + } +})); + +// Create a mock worker +const mockWorker = { + on: vi.fn(), + id: 'mock-worker-id' +}; + +vi.mock('@fastgpt/service/core/evaluation/task/mq', async () => { + const actual = (await vi.importActual('@fastgpt/service/core/evaluation/task/mq')) as any; + return { + ...actual, + getEvaluationTaskWorker: vi.fn(() => mockWorker), + getEvaluationItemWorker: vi.fn(() => mockWorker) + }; +}); + +import { + evaluationTaskQueue, + evaluationItemQueue, + addEvaluationTaskJob, + addEvaluationItemJob, + addEvaluationItemJobs, + removeEvaluationTaskJob, + removeEvaluationItemJobs, + removeEvaluationItemJobsByItemId, + checkEvaluationTaskJobActive, + checkEvaluationItemJobActive, + getEvaluationTaskWorker, + getEvaluationItemWorker +} from '@fastgpt/service/core/evaluation/task/mq'; +import { addLog } from '@fastgpt/service/common/system/log'; + +describe('Evaluation MQ System', () => { + let teamId: string; + let tmbId: string; + let evalDatasetCollectionId: string; + let target: EvalTarget; + let evaluators: EvaluatorSchema[]; + let evaluationId: string; + + beforeAll(async () => { + teamId = '507f1f77bcf86cd799439011'; + tmbId = '507f1f77bcf86cd799439012'; + + target = { + type: 'workflow', + config: { + appId: '507f1f77bcf86cd799439011', + versionId: '507f1f77bcf86cd799439012', + chatConfig: {} + } + }; + }); + + afterAll(async () => { + // 清理测试数据 + await Promise.all([ + MongoEvaluation.deleteMany({ teamId }), + MongoEvalItem.deleteMany({}), + MongoEvalDatasetCollection.deleteMany({ teamId }), + MongoEvalDatasetData.deleteMany({ teamId }), + MongoEvalMetric.deleteMany({ teamId }) + ]); + }); + + beforeEach(async () => { + vi.clearAllMocks(); + + // 创建测试数据 + const dataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'MQ Test Dataset', + description: 'Dataset for MQ testing' + }); + evalDatasetCollectionId = dataset._id.toString(); + + const metric = await MongoEvalMetric.create({ + teamId: teamId, + tmbId: tmbId, + name: 'MQ Test Metric', + description: 'Metric for MQ testing', + type: EvalMetricTypeEnum.Custom, + prompt: 'Please evaluate the quality of the response.', + llmRequired: true, + userInputRequired: true, + actualOutputRequired: true, + expectedOutputRequired: true, + createTime: new Date(), + updateTime: new Date() + }); + + evaluators = [ + { + metric: metric.toObject(), + runtimeConfig: { + llm: 'gpt-3.5-turbo' + }, + thresholdValue: 0.8 + } + ]; + + // 创建测试评估任务 + const evaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'MQ Test Evaluation', + description: 'Test evaluation for MQ', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + evaluationId = evaluation._id.toString(); + }); + + describe('队列初始化', () => { + test('应该正确初始化评估任务队列', () => { + expect(evaluationTaskQueue).toBeDefined(); + expect(typeof evaluationTaskQueue.add).toBe('function'); + }); + + test('应该正确初始化评估项队列', () => { + expect(evaluationItemQueue).toBeDefined(); + expect(typeof evaluationItemQueue.add).toBe('function'); + expect(typeof evaluationItemQueue.addBulk).toBe('function'); + }); + }); + + describe('任务添加', () => { + test('应该成功添加评估任务到队列', async () => { + const jobData = { evalId: evaluationId }; + + await addEvaluationTaskJob(jobData); + + expect(evaluationTaskQueue.add).toHaveBeenCalledWith(evaluationId, jobData, { + deduplication: { id: evaluationId, ttl: 5000 } + }); + }); + + test('应该成功添加评估项到队列', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + const jobData = { evalId: evaluationId, evalItemId: itemId }; + + await addEvaluationItemJob(jobData); + + expect(evaluationItemQueue.add).toHaveBeenCalledWith(itemId, jobData, { + deduplication: { id: itemId, ttl: 5000 } + }); + }); + + test('应该支持延迟添加评估项', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + const jobData = { evalId: evaluationId, evalItemId: itemId }; + const delay = 5000; + + await addEvaluationItemJob(jobData, { delay }); + + expect(evaluationItemQueue.add).toHaveBeenCalledWith(itemId, jobData, { + deduplication: { id: itemId, ttl: 5000 }, + delay + }); + }); + + test('应该成功批量添加评估项', async () => { + const evalItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input 1', expectedOutput: 'Test output 1' } + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input 2', expectedOutput: 'Test output 2' } + } + ]); + + const jobs = evalItems.map((item, index) => ({ + data: { evalId: evaluationId, evalItemId: item._id.toString() }, + delay: index * 1000 + })); + + await addEvaluationItemJobs(jobs); + + expect(evaluationItemQueue.addBulk).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + name: evalItems[0]._id.toString(), + data: { evalId: evaluationId, evalItemId: evalItems[0]._id.toString() }, + opts: expect.objectContaining({ + delay: 0, + deduplication: { id: evalItems[0]._id.toString() } + }) + }), + expect.objectContaining({ + name: evalItems[1]._id.toString(), + data: { evalId: evaluationId, evalItemId: evalItems[1]._id.toString() }, + opts: expect.objectContaining({ + delay: 1000, + deduplication: { id: evalItems[1]._id.toString() } + }) + }) + ]) + ); + }); + + test('批量添加时应该为没有延迟的任务设置默认延迟', async () => { + const evalItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input 1', expectedOutput: 'Test output 1' } + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input 2', expectedOutput: 'Test output 2' } + } + ]); + + const jobs = evalItems.map((item) => ({ + data: { evalId: evaluationId, evalItemId: item._id.toString() } + // 没有指定 delay + })); + + await addEvaluationItemJobs(jobs); + + expect(evaluationItemQueue.addBulk).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + opts: expect.objectContaining({ + delay: 0 // 第一个任务延迟为 0 + }) + }), + expect.objectContaining({ + opts: expect.objectContaining({ + delay: 100 // 第二个任务延迟为 100ms (index * 100) + }) + }) + ]) + ); + }); + }); + + describe('任务清理', () => { + test('应该成功清理评估任务', async () => { + const result = await removeEvaluationTaskJob(evaluationId); + + expect(result).toEqual({ + queue: 'test-queue', + totalJobs: 2, + removedJobs: 2, + failedRemovals: 0, + errors: [] + }); + expect(addLog.debug).toHaveBeenCalledWith('Evaluation task jobs cleanup completed', { + evalId: evaluationId, + result + }); + }); + + test('应该成功清理评估项任务', async () => { + const result = await removeEvaluationItemJobs(evaluationId); + + expect(result).toEqual({ + queue: 'test-queue', + totalJobs: 2, + removedJobs: 2, + failedRemovals: 0, + errors: [] + }); + expect(addLog.debug).toHaveBeenCalledWith('Evaluation item jobs cleanup completed', { + evalId: evaluationId, + result + }); + }); + + test('应该成功按项目ID清理评估项任务', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + const result = await removeEvaluationItemJobsByItemId(itemId); + + expect(result).toEqual({ + queue: 'test-queue', + totalJobs: 2, + removedJobs: 2, + failedRemovals: 0, + errors: [] + }); + expect(addLog.debug).toHaveBeenCalledWith( + 'Evaluation item jobs cleanup completed for specific item', + { + evalItemId: itemId, + result + } + ); + }); + + test('应该支持清理选项', async () => { + const options = { + forceCleanActiveJobs: true, + retryAttempts: 5, + retryDelay: 500 + }; + + await removeEvaluationTaskJob(evaluationId, options); + + // 验证选项被传递给了 createJobCleaner + const { createJobCleaner } = await import( + '@fastgpt/service/core/evaluation/utils/jobCleanup' + ); + expect(createJobCleaner).toHaveBeenCalledWith(options); + }); + }); + + describe('任务状态检查', () => { + test('应该检查评估任务是否活跃', async () => { + // Mock 返回活跃任务 + (evaluationTaskQueue.getJobs as any).mockResolvedValueOnce([ + { data: { evalId: evaluationId } } + ]); + + const isActive = await checkEvaluationTaskJobActive(evaluationId); + + expect(isActive).toBe(true); + expect(evaluationTaskQueue.getJobs).toHaveBeenCalledWith([ + 'active', + 'waiting', + 'delayed', + 'prioritized' + ]); + }); + + test('应该正确检测非活跃的评估任务', async () => { + // Mock 返回空数组 + (evaluationTaskQueue.getJobs as any).mockResolvedValueOnce([]); + + const isActive = await checkEvaluationTaskJobActive(evaluationId); + + expect(isActive).toBe(false); + }); + + test('应该检查评估项是否活跃', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + // Mock 返回活跃任务 + (evaluationItemQueue.getJobs as any).mockResolvedValueOnce([ + { data: { evalItemId: itemId } } + ]); + + const isActive = await checkEvaluationItemJobActive(itemId); + + expect(isActive).toBe(true); + expect(evaluationItemQueue.getJobs).toHaveBeenCalledWith([ + 'active', + 'waiting', + 'delayed', + 'prioritized' + ]); + }); + + test('应该处理状态检查错误', async () => { + // Mock 抛出错误 + (evaluationTaskQueue.getJobs as any).mockRejectedValueOnce(new Error('Queue error')); + + const isActive = await checkEvaluationTaskJobActive(evaluationId); + + expect(isActive).toBe(false); + expect(addLog.error).toHaveBeenCalledWith('[Evaluation] Failed to check task job status', { + evalId: evaluationId, + error: expect.any(Error) + }); + }); + }); + + describe('Worker初始化', () => { + test('应该创建评估任务Worker', () => { + const mockProcessor = vi.fn(); + getEvaluationTaskWorker(mockProcessor); + + // 验证Worker创建函数被调用 + expect(getEvaluationTaskWorker).toHaveBeenCalledWith(mockProcessor); + }); + + test('应该创建评估项Worker', () => { + const mockProcessor = vi.fn(); + getEvaluationItemWorker(mockProcessor); + + // 验证Worker创建函数被调用 + expect(getEvaluationItemWorker).toHaveBeenCalledWith(mockProcessor); + }); + }); +}); diff --git a/test/cases/service/core/evaluation/task/processor.test.ts b/test/cases/service/core/evaluation/task/processor.test.ts new file mode 100644 index 000000000000..37dcfea62f3e --- /dev/null +++ b/test/cases/service/core/evaluation/task/processor.test.ts @@ -0,0 +1,841 @@ +import { beforeAll, afterAll, beforeEach, describe, test, expect, vi } from 'vitest'; +import { Types } from '@fastgpt/service/common/mongo'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import { + MetricResultStatusEnum, + ModelTypeEnum +} from '@fastgpt/global/core/evaluation/metric/constants'; +import { MongoEvaluation, MongoEvalItem } from '@fastgpt/service/core/evaluation/task/schema'; +import { MongoEvalDatasetCollection } from '@fastgpt/service/core/evaluation/dataset/evalDatasetCollectionSchema'; +import { MongoEvalDatasetData } from '@fastgpt/service/core/evaluation/dataset/evalDatasetDataSchema'; +import { MongoEvalMetric } from '@fastgpt/service/core/evaluation/metric/schema'; +import { EvalMetricTypeEnum } from '@fastgpt/global/core/evaluation/metric/constants'; +import type { + EvaluatorSchema, + EvalTarget, + TargetOutput +} from '@fastgpt/global/core/evaluation/type'; +import type { MetricResult } from '@fastgpt/global/core/evaluation/metric/type'; + +// Mock all external dependencies +vi.mock('@fastgpt/service/common/system/log'); +vi.mock('@fastgpt/service/core/evaluation/task/mq'); +vi.mock('@fastgpt/service/core/evaluation/target'); +vi.mock('@fastgpt/service/core/evaluation/evaluator'); +vi.mock('@fastgpt/service/support/permission/teamLimit'); +vi.mock('@fastgpt/service/core/evaluation/utils/usage'); +vi.mock('@fastgpt/service/core/evaluation/summary'); +vi.mock('@fastgpt/service/core/evaluation/summary/util/aggregateScoreCalculator'); +vi.mock('@fastgpt/service/core/evaluation/task/statusCalculator'); +vi.mock('@fastgpt/service/core/evaluation/task/errors'); + +import { + finishEvaluationTask, + evaluationTaskProcessor, + evaluationItemProcessor +} from '@fastgpt/service/core/evaluation/task/processor'; + +describe('EvaluationTaskProcessor', () => { + let teamId: string; + let tmbId: string; + let evalDatasetCollectionId: string; + let target: EvalTarget; + let evaluators: EvaluatorSchema[]; + let evaluationId: string; + + beforeAll(async () => { + teamId = '507f1f77bcf86cd799439011'; + tmbId = '507f1f77bcf86cd799439012'; + + target = { + type: 'workflow', + config: { + appId: '507f1f77bcf86cd799439013', + versionId: '507f1f77bcf86cd799439014', + chatConfig: { + temperature: 0.7, + maxToken: 2000 + } + } + }; + }); + + afterAll(async () => { + // 清理测试数据 + await Promise.all([ + MongoEvaluation.deleteMany({ teamId }), + MongoEvalItem.deleteMany({}), + MongoEvalDatasetCollection.deleteMany({ teamId }), + MongoEvalDatasetData.deleteMany({ teamId }), + MongoEvalMetric.deleteMany({ teamId }) + ]); + }); + + beforeEach(async () => { + vi.clearAllMocks(); + + // 创建测试数据 + const dataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Processor Test Dataset', + description: 'Dataset for processor testing' + }); + evalDatasetCollectionId = dataset._id.toString(); + + // 创建数据集数据项 + await MongoEvalDatasetData.create([ + { + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + evalDatasetCollectionId: dataset._id, + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + }, + { + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + evalDatasetCollectionId: dataset._id, + userInput: 'What is ML?', + expectedOutput: 'Machine Learning' + } + ]); + + const metric = await MongoEvalMetric.create({ + teamId: teamId, + tmbId: tmbId, + name: 'Processor Test Metric', + description: 'Metric for processor testing', + type: EvalMetricTypeEnum.Custom, + prompt: 'Please evaluate the quality of the response.', + llmRequired: true, + userInputRequired: true, + actualOutputRequired: true, + expectedOutputRequired: true, + createTime: new Date(), + updateTime: new Date() + }); + + evaluators = [ + { + metric: metric.toObject(), + runtimeConfig: { + llm: 'gpt-3.5-turbo' + }, + thresholdValue: 0.8 + } + ]; + + // 创建测试评估任务 + const evaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Processor Test Evaluation', + description: 'Test evaluation for processor', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + evaluationId = evaluation._id.toString(); + + // Setup mocks + const { addEvaluationItemJobs } = await import('@fastgpt/service/core/evaluation/task/mq'); + const { createTargetInstance } = await import('@fastgpt/service/core/evaluation/target'); + const { createEvaluatorInstance } = await import('@fastgpt/service/core/evaluation/evaluator'); + const { checkTeamAIPoints } = await import('@fastgpt/service/support/permission/teamLimit'); + const { createMergedEvaluationUsage } = await import( + '@fastgpt/service/core/evaluation/utils/usage' + ); + const { calculateEvaluationItemAggregateScore } = await import( + '@fastgpt/service/core/evaluation/summary/util/aggregateScoreCalculator' + ); + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + const { createEvaluationError } = await import('@fastgpt/service/core/evaluation/task/errors'); + + (addEvaluationItemJobs as any).mockResolvedValue(undefined); + (checkTeamAIPoints as any).mockResolvedValue(undefined); + (createMergedEvaluationUsage as any).mockResolvedValue(undefined); + (calculateEvaluationItemAggregateScore as any).mockResolvedValue(85); + (getBatchEvaluationItemStatus as any).mockResolvedValue(new Map()); + (createEvaluationError as any).mockImplementation((error: any) => new Error(error)); + + // Mock target instance + const mockTargetOutput: TargetOutput = { + actualOutput: 'AI is artificial intelligence technology', + responseTime: 1500, + chatId: 'test-chat-id', + aiChatItemDataId: 'test-ai-chat-item-id', + usage: [ + { + totalPoints: 10, + inputTokens: 50, + outputTokens: 30 + } + ] + }; + + (createTargetInstance as any).mockResolvedValue({ + execute: vi.fn().mockResolvedValue(mockTargetOutput) + }); + + // Mock evaluator instance + const mockEvaluatorOutput: MetricResult = { + metricName: 'Processor Test Metric', + status: MetricResultStatusEnum.Success, + data: { + score: 85, + reason: 'Good quality response', + metricName: 'Processor Test Metric' + }, + totalPoints: 5, + usages: [ + { + promptTokens: 20, + completionTokens: 15, + modelType: ModelTypeEnum.LLM + } + ] + }; + + (createEvaluatorInstance as any).mockResolvedValue({ + evaluate: vi.fn().mockResolvedValue(mockEvaluatorOutput) + }); + + // Mock summary service methods + const { EvaluationSummaryService } = await import('@fastgpt/service/core/evaluation/summary'); + vi.mocked(EvaluationSummaryService.calculateAndSaveMetricScores).mockResolvedValue(undefined); + vi.mocked(EvaluationSummaryService.generateSummaryReports).mockResolvedValue(undefined); + }); + + describe('finishEvaluationTask', () => { + test('应该正确完成评估任务(所有项目完成)', async () => { + // 创建已完成的评估项目 + await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, + finishTime: new Date() + } + ]); + + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([ + ['item1', EvaluationStatusEnum.completed], + ['item2', EvaluationStatusEnum.completed] + ]) + ); + + await finishEvaluationTask(evaluationId); + + // 验证任务状态更新 + const updatedEvaluation = await MongoEvaluation.findById(evaluationId); + expect(updatedEvaluation?.finishTime).toBeDefined(); + }); + + test('应该跳过仍有待处理项目的任务', async () => { + // 创建混合状态的评估项目 + const allItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q2', expectedOutput: 'A2' } + } + ]); + + const itemIds = allItems.map((item) => item._id.toString()); + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([ + [itemIds[0], EvaluationStatusEnum.completed], + [itemIds[1], EvaluationStatusEnum.evaluating] + ]) + ); + + await finishEvaluationTask(evaluationId); + + // 验证任务状态未更新 + const evaluation = await MongoEvaluation.findById(evaluationId); + expect(evaluation?.finishTime).toBeUndefined(); + }); + + test('应该触发摘要生成', async () => { + // 创建已完成的评估项目 + await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + } + ]); + + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([['item1', EvaluationStatusEnum.completed]]) + ); + + const { EvaluationSummaryService } = await import('@fastgpt/service/core/evaluation/summary'); + + await finishEvaluationTask(evaluationId); + + expect(EvaluationSummaryService.calculateAndSaveMetricScores).toHaveBeenCalledWith( + evaluationId + ); + }); + + test('应该正确处理空评估任务', async () => { + const emptyEvaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Empty Evaluation', + description: 'Empty evaluation for testing', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + + const { addLog } = await import('@fastgpt/service/common/system/log'); + + await finishEvaluationTask(emptyEvaluation._id.toString()); + + expect(addLog.warn).toHaveBeenCalledWith( + expect.stringContaining('Evaluation task has no evaluation item data') + ); + }); + + test('应该正确处理错误情况', async () => { + // Create evaluation items first to avoid empty task scenario + await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' } + } + ]); + + // Setup error mock before calling the function + const { addLog } = await import('@fastgpt/service/common/system/log'); + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockRejectedValue(new Error('Status check failed')); + + await finishEvaluationTask(evaluationId); + + // 验证错误被正确处理 - check if any call contains our expected message + const errorCalls = (addLog.error as any).mock.calls; + const hasExpectedError = errorCalls.some( + (call: any[]) => + call[0] && + typeof call[0] === 'string' && + call[0].includes('[Evaluation] Error occurred while completing task') + ); + + expect(hasExpectedError).toBe(true); + }); + }); + + describe('evaluationTaskProcessor', () => { + test('应该成功处理评估任务(已有评估项目)', async () => { + // 创建现有评估项目 + const existingItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' } + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q2', expectedOutput: 'A2' } + } + ]); + + const { getBatchEvaluationItemStatus } = await import( + '@fastgpt/service/core/evaluation/task/statusCalculator' + ); + (getBatchEvaluationItemStatus as any).mockResolvedValue( + new Map([ + [existingItems[0]._id.toString(), EvaluationStatusEnum.queuing], + [existingItems[1]._id.toString(), EvaluationStatusEnum.queuing] + ]) + ); + + const mockJob = { + data: { evalId: evaluationId }, + updateProgress: vi.fn() + }; + + await evaluationTaskProcessor(mockJob as any); + + expect(mockJob.updateProgress).toHaveBeenCalledWith(0); + expect(mockJob.updateProgress).toHaveBeenCalledWith(20); + expect(mockJob.updateProgress).toHaveBeenCalledWith(100); + + const { addEvaluationItemJobs } = await import('@fastgpt/service/core/evaluation/task/mq'); + expect(addEvaluationItemJobs).toHaveBeenCalled(); + }); + + test('应该创建评估项目(无现有项目)', async () => { + const mockJob = { + data: { evalId: evaluationId }, + updateProgress: vi.fn() + }; + + await evaluationTaskProcessor(mockJob as any); + + // 验证评估项目被创建 + const createdItems = await MongoEvalItem.find({ evalId: evaluationId }); + expect(createdItems.length).toBe(2); + + const { addEvaluationItemJobs } = await import('@fastgpt/service/core/evaluation/task/mq'); + expect(addEvaluationItemJobs).toHaveBeenCalled(); + }); + + test('应该处理评估任务不存在的情况', async () => { + const nonExistentId = new Types.ObjectId().toString(); + const mockJob = { + data: { evalId: nonExistentId }, + updateProgress: vi.fn() + }; + + await evaluationTaskProcessor(mockJob as any); + + expect(mockJob.updateProgress).toHaveBeenCalledWith(0); + + const { addLog } = await import('@fastgpt/service/common/system/log'); + expect(addLog.warn).toHaveBeenCalledWith(expect.stringContaining('no longer exists')); + }); + + test('应该验证目标配置', async () => { + // 创建无效目标配置的评估任务 + const invalidEvaluationDoc = new MongoEvaluation({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Invalid Target Evaluation', + description: 'Evaluation with invalid target', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target: { + type: 'workflow' + // 缺少 config 字段 + } as any, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + + // Save without validation to test the processor's validation logic + const invalidEvaluation = await invalidEvaluationDoc.save({ validateBeforeSave: false }); + + const mockJob = { + data: { evalId: invalidEvaluation._id.toString() }, + updateProgress: vi.fn() + }; + + await expect(evaluationTaskProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该验证评估器配置', async () => { + // 创建无效评估器配置的评估任务 + const invalidEvaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Invalid Evaluators Evaluation', + description: 'Evaluation with invalid evaluators', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target, + evaluators: [], // 空评估器 + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + + const mockJob = { + data: { evalId: invalidEvaluation._id.toString() }, + updateProgress: vi.fn() + }; + + await expect(evaluationTaskProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该处理数据集为空的情况', async () => { + // 创建空数据集 + const emptyDataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Empty Dataset', + description: 'Empty dataset for testing' + }); + + const emptyEvaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Empty Dataset Evaluation', + description: 'Evaluation with empty dataset', + evalDatasetCollectionId: emptyDataset._id, + target, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + + const mockJob = { + data: { evalId: emptyEvaluation._id.toString() }, + updateProgress: vi.fn() + }; + + await expect(evaluationTaskProcessor(mockJob as any)).rejects.toThrow(); + }); + }); + + describe('evaluationItemProcessor', () => { + test('应该成功处理评估项目', async () => { + // 创建评估项目 + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await evaluationItemProcessor(mockJob as any); + + expect(mockJob.updateProgress).toHaveBeenCalledWith(0); + expect(mockJob.updateProgress).toHaveBeenCalledWith(10); + expect(mockJob.updateProgress).toHaveBeenCalledWith(30); + expect(mockJob.updateProgress).toHaveBeenCalledWith(100); + + // 验证项目被更新 + const updatedItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedItem?.targetOutput).toBeDefined(); + expect(updatedItem?.evaluatorOutputs).toBeDefined(); + expect(updatedItem?.aggregateScore).toBeDefined(); + }); + + test('应该从检查点恢复', async () => { + // 创建带有现有输出的评估项目 + const mockTargetOutput: TargetOutput = { + actualOutput: 'Existing AI response', + responseTime: 1000, + chatId: 'existing-chat-id', + aiChatItemDataId: 'existing-ai-chat-item-id' + }; + + const mockEvaluatorOutputs: MetricResult[] = [ + { + metricName: 'Test Metric', + status: MetricResultStatusEnum.Success, + data: { + score: 90, + metricName: 'Test Metric' + } + } + ]; + + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + }, + targetOutput: mockTargetOutput, + evaluatorOutputs: mockEvaluatorOutputs + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await evaluationItemProcessor(mockJob as any); + + // 验证跳过了目标执行和评估器执行 + const { createTargetInstance } = await import('@fastgpt/service/core/evaluation/target'); + expect(createTargetInstance).not.toHaveBeenCalled(); + }); + + test('应该处理评估项目不存在的情况', async () => { + const nonExistentId = new Types.ObjectId().toString(); + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: nonExistentId + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该处理评估任务不存在的情况', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const mockJob = { + data: { + evalId: new Types.ObjectId().toString(), + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该检查AI点数', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const { checkTeamAIPoints } = await import('@fastgpt/service/support/permission/teamLimit'); + (checkTeamAIPoints as any).mockRejectedValue(new Error('Insufficient AI points')); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该处理目标执行错误', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const { createTargetInstance } = await import('@fastgpt/service/core/evaluation/target'); + (createTargetInstance as any).mockResolvedValue({ + execute: vi.fn().mockRejectedValue(new Error('Target execution failed')) + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该处理评估器执行错误', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const { createEvaluatorInstance } = await import( + '@fastgpt/service/core/evaluation/evaluator' + ); + (createEvaluatorInstance as any).mockResolvedValue({ + evaluate: vi.fn().mockRejectedValue(new Error('Evaluator execution failed')) + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该记录使用情况', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await evaluationItemProcessor(mockJob as any); + + const { createMergedEvaluationUsage } = await import( + '@fastgpt/service/core/evaluation/utils/usage' + ); + expect(createMergedEvaluationUsage).toHaveBeenCalledTimes(2); // 目标 + 评估器 + }); + + test('应该处理部分评估器失败', async () => { + // 创建有多个评估器的评估任务 + const multiEvaluators = [ + evaluators[0], + { + metric: evaluators[0].metric, + runtimeConfig: { llm: 'gpt-4' }, + thresholdValue: 0.9 + } + ]; + + await MongoEvaluation.updateOne( + { _id: evaluationId }, + { $set: { evaluators: multiEvaluators } } + ); + + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + // Mock 第二个评估器失败 + const { createEvaluatorInstance } = await import( + '@fastgpt/service/core/evaluation/evaluator' + ); + (createEvaluatorInstance as any) + .mockResolvedValueOnce({ + evaluate: vi.fn().mockResolvedValue({ + metricName: 'Test Metric 1', + status: MetricResultStatusEnum.Success, + data: { score: 85 } + }) + }) + .mockResolvedValueOnce({ + evaluate: vi.fn().mockRejectedValue(new Error('Second evaluator failed')) + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow('Evaluator errors'); + }); + }); + + describe('错误处理和边界情况', () => { + test('应该正确处理无效的targetOutput', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const { createTargetInstance } = await import('@fastgpt/service/core/evaluation/target'); + (createTargetInstance as any).mockResolvedValue({ + execute: vi.fn().mockResolvedValue({ + responseTime: 1000, + usage: [] + // 缺少 actualOutput + }) + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow(); + }); + + test('应该正确处理评估器状态错误', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { + userInput: 'What is AI?', + expectedOutput: 'Artificial Intelligence' + } + }); + + const { createEvaluatorInstance } = await import( + '@fastgpt/service/core/evaluation/evaluator' + ); + (createEvaluatorInstance as any).mockResolvedValue({ + evaluate: vi.fn().mockResolvedValue({ + metricName: 'Test Metric', + status: MetricResultStatusEnum.Failed, + error: 'Evaluation failed', + data: { + score: undefined, + metricName: 'Test Metric' + } + }) + }); + + const mockJob = { + data: { + evalId: evaluationId, + evalItemId: evalItem._id.toString() + }, + updateProgress: vi.fn() + }; + + await expect(evaluationItemProcessor(mockJob as any)).rejects.toThrow('Evaluator errors'); + }); + }); +}); diff --git a/test/cases/service/core/evaluation/task/statusCalculator.test.ts b/test/cases/service/core/evaluation/task/statusCalculator.test.ts new file mode 100644 index 000000000000..fc051e8faf5c --- /dev/null +++ b/test/cases/service/core/evaluation/task/statusCalculator.test.ts @@ -0,0 +1,419 @@ +import { beforeAll, afterAll, beforeEach, describe, test, expect, vi } from 'vitest'; +import { Types } from '@fastgpt/service/common/mongo'; +import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; +import { MongoEvaluation, MongoEvalItem } from '@fastgpt/service/core/evaluation/task/schema'; +import { MongoEvalDatasetCollection } from '@fastgpt/service/core/evaluation/dataset/evalDatasetCollectionSchema'; +import { MongoEvalDatasetData } from '@fastgpt/service/core/evaluation/dataset/evalDatasetDataSchema'; +import { MongoEvalMetric } from '@fastgpt/service/core/evaluation/metric/schema'; +import { EvalMetricTypeEnum } from '@fastgpt/global/core/evaluation/metric/constants'; +import type { EvaluatorSchema, EvalTarget } from '@fastgpt/global/core/evaluation/type'; + +// Mock queue system +vi.mock('@fastgpt/service/core/evaluation/task/mq', () => ({ + evaluationTaskQueue: { + getJobs: vi.fn().mockResolvedValue([]) + }, + evaluationItemQueue: { + getJobs: vi.fn().mockResolvedValue([]) + } +})); + +import { + getEvaluationTaskStatus, + getEvaluationItemStatus, + getEvaluationTaskStats, + getBatchEvaluationItemStatus +} from '@fastgpt/service/core/evaluation/task/statusCalculator'; +import { evaluationTaskQueue, evaluationItemQueue } from '@fastgpt/service/core/evaluation/task/mq'; + +describe('StatusCalculator', () => { + let teamId: string; + let tmbId: string; + let evalDatasetCollectionId: string; + let target: EvalTarget; + let evaluators: EvaluatorSchema[]; + let evaluationId: string; + + beforeAll(async () => { + teamId = '507f1f77bcf86cd799439011'; + tmbId = '507f1f77bcf86cd799439012'; + + target = { + type: 'workflow', + config: { + appId: '507f1f77bcf86cd799439011', + versionId: '507f1f77bcf86cd799439012', + chatConfig: {} + } + }; + }); + + afterAll(async () => { + // 清理测试数据 + await Promise.all([ + MongoEvaluation.deleteMany({ teamId }), + MongoEvalItem.deleteMany({}), + MongoEvalDatasetCollection.deleteMany({ teamId }), + MongoEvalDatasetData.deleteMany({ teamId }), + MongoEvalMetric.deleteMany({ teamId }) + ]); + }); + + beforeEach(async () => { + vi.clearAllMocks(); + + // 创建测试数据 + const dataset = await MongoEvalDatasetCollection.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'StatusCalculator Test Dataset', + description: 'Dataset for statusCalculator testing' + }); + evalDatasetCollectionId = dataset._id.toString(); + + const metric = await MongoEvalMetric.create({ + teamId: teamId, + tmbId: tmbId, + name: 'StatusCalculator Test Metric', + description: 'Metric for statusCalculator testing', + type: EvalMetricTypeEnum.Custom, + prompt: 'Please evaluate the quality of the response.', + llmRequired: true, + userInputRequired: true, + actualOutputRequired: true, + expectedOutputRequired: true, + createTime: new Date(), + updateTime: new Date() + }); + + evaluators = [ + { + metric: metric.toObject(), + runtimeConfig: { + llm: 'gpt-3.5-turbo' + }, + thresholdValue: 0.8 + } + ]; + + // 创建测试评估任务 + const evaluation = await MongoEvaluation.create({ + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'StatusCalculator Test Evaluation', + description: 'Test evaluation for statusCalculator', + evalDatasetCollectionId: new Types.ObjectId(evalDatasetCollectionId), + target, + evaluators: evaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.queuing, + createTime: new Date() + }); + evaluationId = evaluation._id.toString(); + }); + + describe('getEvaluationTaskStatus', () => { + test('应该返回queuing状态(无任务时)', async () => { + // Mock 无任务 + (evaluationTaskQueue.getJobs as any).mockResolvedValue([]); + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const status = await getEvaluationTaskStatus(evaluationId); + + expect(status).toBe(EvaluationStatusEnum.queuing); + }); + + test('应该返回 evaluating 状态(有活跃任务时)', async () => { + // Mock 活跃任务 + const mockJob = { + id: `eval_task_${evaluationId}`, + data: { evalId: evaluationId }, + getState: vi.fn().mockResolvedValue('active') + }; + (evaluationTaskQueue.getJobs as any).mockResolvedValue([mockJob]); + + const status = await getEvaluationTaskStatus(evaluationId); + + expect(status).toBe(EvaluationStatusEnum.evaluating); + }); + + test('数据库记录不存在时应该返回 queuing 状态', async () => { + const nonExistentId = new Types.ObjectId().toString(); + (evaluationTaskQueue.getJobs as any).mockResolvedValue([]); + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const status = await getEvaluationTaskStatus(nonExistentId); + + expect(status).toBe(EvaluationStatusEnum.queuing); + }); + + test('应该正确处理已完成的评估任务', async () => { + // 更新评估任务状态为已完成 + await MongoEvaluation.updateOne( + { _id: new Types.ObjectId(evaluationId) }, + { $set: { finishTime: new Date() } } + ); + + (evaluationTaskQueue.getJobs as any).mockResolvedValue([]); + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const status = await getEvaluationTaskStatus(evaluationId); + + expect(status).toBe(EvaluationStatusEnum.completed); + }); + + test('应该返回 error 状态(任务失败时)', async () => { + // Mock 失败任务 + const mockJob = { + id: `eval_task_${evaluationId}`, + data: { evalId: evaluationId }, + getState: vi.fn().mockResolvedValue('failed') + }; + (evaluationTaskQueue.getJobs as any).mockResolvedValue([mockJob]); + + const status = await getEvaluationTaskStatus(evaluationId); + + expect(status).toBe(EvaluationStatusEnum.error); + }); + }); + + describe('getEvaluationItemStatus', () => { + test('应该返回数据库中的状态(无活跃任务时)', async () => { + // 创建测试评估项 + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const status = await getEvaluationItemStatus(itemId); + + expect(status).toBe(EvaluationStatusEnum.queuing); + }); + + test('应该返回 evaluating 状态(有活跃任务时)', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Test input', expectedOutput: 'Test output' } + }); + const itemId = evalItem._id.toString(); + + // Mock 活跃任务 + const mockJob = { + id: `eval_item_${itemId}`, + data: { evalItemId: itemId }, + getState: vi.fn().mockResolvedValue('active') + }; + (evaluationItemQueue.getJobs as any).mockResolvedValue([mockJob]); + + const status = await getEvaluationItemStatus(itemId); + + expect(status).toBe(EvaluationStatusEnum.evaluating); + }); + + test('数据库记录不存在时应该返回 queuing 状态', async () => { + const nonExistentId = new Types.ObjectId().toString(); + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const status = await getEvaluationItemStatus(nonExistentId); + + expect(status).toBe(EvaluationStatusEnum.queuing); + }); + }); + + describe('getEvaluationTaskStats', () => { + test('应该返回正确的统计信息', async () => { + // 创建不同状态的评估项 + const evalItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, + finishTime: new Date() + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q3', expectedOutput: 'A3' } + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q4', expectedOutput: 'A4' } + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q5', expectedOutput: 'A5' }, + errorMessage: 'Test error', + finishTime: new Date() + } + ]); + + // Mock 队列状态 - 一个项目正在执行 + const activeJobs = [ + { + id: 'eval_item_someId', + data: { evalId: evaluationId, evalItemId: evalItems[2]._id.toString() }, + getState: vi.fn().mockResolvedValue('active') + } + ]; + (evaluationItemQueue.getJobs as any).mockResolvedValue(activeJobs); + + const stats = await getEvaluationTaskStats(evaluationId); + + expect(stats.total).toBe(5); + expect(stats.completed).toBe(2); // 有finishTime且无errorMessage的项目 + expect(stats.evaluating).toBe(1); // 队列中活跃的任务 + expect(stats.queuing).toBe(1); // 没有finishTime且不在队列中的项目 + expect(stats.error).toBe(1); // 有errorMessage的项目 + }); + + test('应该正确处理空评估任务', async () => { + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const stats = await getEvaluationTaskStats(evaluationId); + + expect(stats.total).toBe(0); + expect(stats.completed).toBe(0); + expect(stats.evaluating).toBe(0); + expect(stats.queuing).toBe(0); + expect(stats.error).toBe(0); + }); + }); + + describe('getBatchEvaluationItemStatus', () => { + test('应该返回多个项目的状态映射', async () => { + // 创建测试评估项 + const evalItems = await MongoEvalItem.create([ + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }, + { + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q2', expectedOutput: 'A2' } + } + ]); + + const itemIds = evalItems.map((item) => item._id.toString()); + + // Mock 队列状态 - 第二个项目正在执行 + const activeJobs = [ + { + id: `eval_item_${itemIds[1]}`, + data: { evalItemId: itemIds[1] }, + getState: vi.fn().mockResolvedValue('active') + } + ]; + (evaluationItemQueue.getJobs as any).mockResolvedValue(activeJobs); + + const statusMap = await getBatchEvaluationItemStatus(itemIds); + + expect(statusMap.size).toBe(2); + expect(statusMap.get(itemIds[0])).toBe(EvaluationStatusEnum.completed); + expect(statusMap.get(itemIds[1])).toBe(EvaluationStatusEnum.evaluating); + }); + + test('应该正确处理不存在的项目ID', async () => { + const nonExistentIds = [new Types.ObjectId().toString(), new Types.ObjectId().toString()]; + + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const statusMap = await getBatchEvaluationItemStatus(nonExistentIds); + + expect(statusMap.size).toBe(2); + expect(statusMap.get(nonExistentIds[0])).toBe(EvaluationStatusEnum.queuing); + expect(statusMap.get(nonExistentIds[1])).toBe(EvaluationStatusEnum.queuing); + }); + + test('应该正确处理空数组', async () => { + const statusMap = await getBatchEvaluationItemStatus([]); + + expect(statusMap.size).toBe(0); + }); + + test('应该正确处理混合存在和不存在的项目', async () => { + // 创建一个存在的评估项 + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }); + + const existingId = evalItem._id.toString(); + const nonExistentId = new Types.ObjectId().toString(); + const itemIds = [existingId, nonExistentId]; + + (evaluationItemQueue.getJobs as any).mockResolvedValue([]); + + const statusMap = await getBatchEvaluationItemStatus(itemIds); + + expect(statusMap.size).toBe(2); + expect(statusMap.get(existingId)).toBe(EvaluationStatusEnum.completed); + expect(statusMap.get(nonExistentId)).toBe(EvaluationStatusEnum.queuing); + }); + }); + + describe('队列状态优先级测试', () => { + test('队列中的活跃任务应该优先于数据库状态', async () => { + // 创建状态为completed的评估项 + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }); + const itemId = evalItem._id.toString(); + + // Mock 队列中有活跃任务 + const mockJob = { + id: `eval_item_${itemId}`, + data: { evalItemId: itemId }, + getState: vi.fn().mockResolvedValue('active') + }; + (evaluationItemQueue.getJobs as any).mockResolvedValue([mockJob]); + + const status = await getEvaluationItemStatus(itemId); + + // 队列状态应该优先,返回evaluating而不是completed + expect(status).toBe(EvaluationStatusEnum.evaluating); + }); + + test('完成的任务队列状态应该被忽略', async () => { + const evalItem = await MongoEvalItem.create({ + evalId: new Types.ObjectId(evaluationId), + dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, + finishTime: new Date() + }); + const itemId = evalItem._id.toString(); + + // Mock 队列中有已完成的任务 + const mockJob = { + id: `eval_item_${itemId}`, + data: { evalItemId: itemId }, + getState: vi.fn().mockResolvedValue('completed') + }; + (evaluationItemQueue.getJobs as any).mockResolvedValue([mockJob]); + + const status = await getEvaluationItemStatus(itemId); + + // 应该返回数据库状态 + expect(status).toBe(EvaluationStatusEnum.completed); + }); + }); + + describe('错误处理', () => { + test('队列查询错误应该返回error状态', async () => { + // Mock 队列查询失败 + (evaluationTaskQueue.getJobs as any).mockRejectedValue(new Error('Queue error')); + + const status = await getEvaluationTaskStatus(evaluationId); + + expect(status).toBe(EvaluationStatusEnum.error); + }); + }); +});