From 82fd6a5ef9b09531b7db891c992a13aa1dce976d Mon Sep 17 00:00:00 2001 From: chanzhi82020 Date: Tue, 16 Sep 2025 15:19:29 +0800 Subject: [PATCH] fix: fix eval item processor logic --- packages/global/core/evaluation/api.d.ts | 55 +- packages/global/core/evaluation/type.d.ts | 10 +- .../service/core/evaluation/target/index.ts | 72 +- .../service/core/evaluation/task/index.ts | 524 +++-------- packages/service/core/evaluation/task/mq.ts | 8 +- .../service/core/evaluation/task/processor.ts | 212 +++-- .../service/core/evaluation/task/schema.ts | 8 +- .../core/evaluation/task/dataItem/delete.ts | 58 -- .../core/evaluation/task/dataItem/export.ts | 55 -- .../api/core/evaluation/task/dataItem/list.ts | 42 - .../core/evaluation/task/dataItem/retry.ts | 56 -- .../core/evaluation/task/dataItem/update.ts | 67 -- .../pages/api/core/evaluation/task/list.ts | 5 +- .../evaluation/task/dataItem/delete.test.ts | 149 --- .../evaluation/task/dataItem/export.test.ts | 225 ----- .../evaluation/task/dataItem/list.test.ts | 226 ----- .../evaluation/task/dataItem/retry.test.ts | 113 --- .../evaluation/task/dataItem/update.test.ts | 131 --- .../api/core/evaluation/task/list.test.ts | 10 +- .../service/core/evaluation/task.test.ts | 880 ++++++++++-------- 20 files changed, 779 insertions(+), 2127 deletions(-) delete mode 100644 projects/app/src/pages/api/core/evaluation/task/dataItem/delete.ts delete mode 100644 projects/app/src/pages/api/core/evaluation/task/dataItem/export.ts delete mode 100644 projects/app/src/pages/api/core/evaluation/task/dataItem/list.ts delete mode 100644 projects/app/src/pages/api/core/evaluation/task/dataItem/retry.ts delete mode 100644 projects/app/src/pages/api/core/evaluation/task/dataItem/update.ts delete mode 100644 test/cases/pages/api/core/evaluation/task/dataItem/delete.test.ts delete mode 100644 test/cases/pages/api/core/evaluation/task/dataItem/export.test.ts delete mode 100644 test/cases/pages/api/core/evaluation/task/dataItem/list.test.ts delete mode 100644 test/cases/pages/api/core/evaluation/task/dataItem/retry.test.ts delete mode 100644 test/cases/pages/api/core/evaluation/task/dataItem/update.test.ts diff --git a/packages/global/core/evaluation/api.d.ts b/packages/global/core/evaluation/api.d.ts index f2dfdb71ef28..ff6dbc156af2 100644 --- a/packages/global/core/evaluation/api.d.ts +++ b/packages/global/core/evaluation/api.d.ts @@ -35,7 +35,6 @@ export type ListEvaluationsRequest = PaginationProps<{ searchKey?: string; appName?: string; appId?: string; - versionId?: string; }>; export type ListEvaluationsResponse = PaginationResponse; @@ -79,13 +78,7 @@ export type ListEvaluationItemsResponse = PaginationResponse; @@ -98,49 +91,3 @@ export type RetryEvaluationItemResponse = MessageResponse; // Delete Evaluation Item export type DeleteEvaluationItemRequest = EvalItemIdQuery; export type DeleteEvaluationItemResponse = MessageResponse; - -// ===== DataItem Aggregation API ===== - -// Query for dataItem ID -export type DataItemIdQuery = { dataItemId: string }; - -// DataItem List (Grouped by DataItem) -export type DataItemListRequest = PaginationProps< - EvalIdQuery & { - status?: number; // Optional: filter by status - keyword?: string; // Optional: search in dataItem content - } ->; -export type DataItemGroupedItem = { - dataItemId: string; - dataItem: EvaluationDataItemType; - items: EvaluationItemDisplayType[]; - statistics?: EvaluationStatistics; -}; -export type DataItemListResponse = PaginationResponse; - -// Delete DataItem Items -export type DeleteDataItemRequest = DataItemIdQuery & EvalIdQuery; -export type DeleteDataItemResponse = { - message: string; - deletedCount: number; -}; - -// Retry DataItem Items -export type RetryDataItemRequest = DataItemIdQuery & EvalIdQuery; -export type RetryDataItemResponse = { - message: string; - retriedCount: number; -}; - -// Update DataItem Items -export type UpdateDataItemRequest = DataItemIdQuery & EvalIdQuery & Partial; -export type UpdateDataItemResponse = { - message: string; - updatedCount: number; -}; - -// Export All DataItems Results -export type ExportDataItemsResultsRequest = EvalIdQuery & { - format?: 'csv' | 'json'; -}; diff --git a/packages/global/core/evaluation/type.d.ts b/packages/global/core/evaluation/type.d.ts index 5e00299a01ce..168ff6f19c04 100644 --- a/packages/global/core/evaluation/type.d.ts +++ b/packages/global/core/evaluation/type.d.ts @@ -101,17 +101,17 @@ export type EvaluationDataItemType = EvalDatasetDataSchemaType & { targetCallParams?: TargetCallParams; }; -// Evaluation item type (atomic: one dataItem + one target + one evaluator) +// Evaluation item type (batch: one dataItem + one target + multiple evaluators) export type EvaluationItemSchemaType = { _id: string; evalId: string; // Dependent component configurations dataItem: EvaluationDataItemType; target: EvalTarget; - evaluator: EvaluatorSchema; // Single evaluator configuration + evaluators: EvaluatorSchema[]; // Multiple evaluator configurations // Execution results targetOutput?: TargetOutput; // Actual output from target - evaluatorOutput?: MetricResult; // Result from single evaluator + evaluatorOutputs?: MetricResult[]; // Results from multiple evaluators status: EvaluationStatusEnum; retry: number; finishTime?: Date; @@ -157,9 +157,7 @@ export type EvaluationDisplayType = Pick< sourceMember: SourceMemberType; }; -export type EvaluationItemDisplayType = EvaluationItemSchemaType & { - evalItemId: string; -}; +export type EvaluationItemDisplayType = EvaluationItemSchemaType; export interface CreateEvaluationParams { name: string; diff --git a/packages/service/core/evaluation/target/index.ts b/packages/service/core/evaluation/target/index.ts index 113f0e1e1cc0..35967e5a3aa7 100644 --- a/packages/service/core/evaluation/target/index.ts +++ b/packages/service/core/evaluation/target/index.ts @@ -112,42 +112,42 @@ export class WorkflowTarget extends EvaluationTarget { // Construct conversation history based on input.context const histories: (UserChatItemType | AIChatItemType)[] = []; - if (input.context && input.context.length > 0) { - // Convert context strings to alternating user-ai conversation history - // Assume context format: [user1, ai1, user2, ai2, ...] - for (let i = 0; i < input.context.length; i++) { - const isUser = i % 2 === 0; - const content = input.context[i]; - - if (isUser) { - // User message - histories.push({ - obj: ChatRoleEnum.Human, - value: [ - { - type: ChatItemValueTypeEnum.text, - text: { - content - } - } - ] - }); - } else { - // AI message - histories.push({ - obj: ChatRoleEnum.AI, - value: [ - { - type: ChatItemValueTypeEnum.text, - text: { - content - } - } - ] - }); - } - } - } + // if (input.histories && input.histories.length > 0) { + // // Convert histories strings to alternating user-ai conversation history + // // Assume histories format: [user1, ai1, user2, ai2, ...] + // for (let i = 0; i < input.histories.length; i++) { + // const isUser = i % 2 === 0; + // const content = input.histories[i]; + + // if (isUser) { + // // User message + // histories.push({ + // obj: ChatRoleEnum.Human, + // value: [ + // { + // type: ChatItemValueTypeEnum.text, + // text: { + // content + // } + // } + // ] + // }); + // } else { + // // AI message + // histories.push({ + // obj: ChatRoleEnum.AI, + // value: [ + // { + // type: ChatItemValueTypeEnum.text, + // text: { + // content + // } + // } + // ] + // }); + // } + // } + // } const chatId = getNanoid(); diff --git a/packages/service/core/evaluation/task/index.ts b/packages/service/core/evaluation/task/index.ts index a4f6266b513b..867e6dfa3e6c 100644 --- a/packages/service/core/evaluation/task/index.ts +++ b/packages/service/core/evaluation/task/index.ts @@ -5,11 +5,8 @@ import type { CreateEvaluationParams, EvaluationItemDisplayType, TargetCallParams, - EvaluationDataItemType, EvaluationDisplayType } from '@fastgpt/global/core/evaluation/type'; -import type { DataItemListResponse } from '@fastgpt/global/core/evaluation/api'; -import type { MetricResult } from '@fastgpt/global/core/evaluation/metric/type'; import { Types } from 'mongoose'; import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; import { @@ -26,9 +23,6 @@ import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation' import { mongoSessionRun } from '../../../common/mongo/sessionRun'; import { type ClientSession } from '../../../common/mongo'; -// Constants -const MAX_EXPORT_PAGE_SIZE = 100000; - export class EvaluationTaskService { static async createEvaluation( params: CreateEvaluationParams & { @@ -47,8 +41,8 @@ export class EvaluationTaskService { // Apply default configuration to evaluators (weights, thresholds, etc.) const { evaluators: evaluatorsWithDefaultConfig, summaryConfigs } = buildEvalDataConfig( - evaluationParams.evaluators - ); + evaluationParams.evaluators + ); const createAndStart = async (session: ClientSession) => { // Create evaluation within transaction const evaluation = await MongoEvaluation.create( @@ -173,17 +167,10 @@ export class EvaluationTaskService { tmbId?: string, isOwner: boolean = false, appName?: string, - appId?: string, - versionId?: string + appId?: string ): Promise<{ list: EvaluationDisplayType[]; total: number }> { // Build basic filter and pagination const filter: any = { teamId: new Types.ObjectId(teamId) }; - if (searchKey) { - filter.$or = [ - { name: { $regex: searchKey, $options: 'i' } }, - { description: { $regex: searchKey, $options: 'i' } } - ]; - } const skip = offset; const limit = pageSize; const sort = { createTime: -1 as const }; @@ -247,7 +234,7 @@ export class EvaluationTaskService { ]; // Add target filtering stage if any target filters are provided - if (appName || appId || versionId) { + if (appName || appId) { const targetFilter: any = {}; if (appName) { @@ -258,13 +245,23 @@ export class EvaluationTaskService { targetFilter['target.config.appId'] = appId; } - if (versionId) { - targetFilter['target.config.versionId'] = versionId; - } - aggregationPipeline.push({ $match: targetFilter }); } + // Add searchKey filtering after target config is populated (includes versionId/versionName search) + if (searchKey) { + aggregationPipeline.push({ + $match: { + $or: [ + { name: { $regex: searchKey, $options: 'i' } }, + { description: { $regex: searchKey, $options: 'i' } }, + { 'target.config.versionId': { $regex: searchKey, $options: 'i' } }, + { 'target.config.versionName': { $regex: searchKey, $options: 'i' } } + ] + } + }); + } + const [evaluations, total] = await Promise.all([ MongoEvaluation.aggregate([ ...aggregationPipeline, @@ -276,7 +273,7 @@ export class EvaluationTaskService { pipeline: [ { $match: { - $expr: { $eq: ['$evalId', '$evalId'] } + $expr: { $eq: ['$evalId', '$$evalId'] } } }, { @@ -402,7 +399,7 @@ export class EvaluationTaskService { pipeline: [ { $match: { - $expr: { $eq: ['$evalId', '$evalId'] } + $expr: { $eq: ['$evalId', '$$evalId'] } } }, { @@ -482,35 +479,6 @@ export class EvaluationTaskService { return evaluation; } - static async listEvaluationItems( - evalId: string, - teamId: string, - offset: number = 0, - pageSize: number = 20 - ): Promise<{ items: EvaluationItemDisplayType[]; total: number }> { - const evaluation = await this.getEvaluation(evalId, teamId); - - const skip = offset; - const limit = pageSize; - - const [items, total] = await Promise.all([ - MongoEvalItem.find({ evalId: evaluation._id }) - .sort({ createTime: -1 }) - .skip(skip) - .limit(limit) - .lean() - .then((items) => - items.map((item) => ({ - ...item, - evalItemId: item._id.toString() - })) - ), - MongoEvalItem.countDocuments({ evalId: evaluation._id }) - ]); - - return { items, total }; - } - static async startEvaluation(evalId: string, teamId: string): Promise { const evaluation = await this.getEvaluation(evalId, teamId); @@ -676,6 +644,29 @@ export class EvaluationTaskService { // ========================= Evaluation Item Related APIs ========================= + static async listEvaluationItems( + evalId: string, + teamId: string, + offset: number = 0, + pageSize: number = 20 + ): Promise<{ items: EvaluationItemDisplayType[]; total: number }> { + const evaluation = await this.getEvaluation(evalId, teamId); + + const skip = offset; + const limit = pageSize; + + const [items, total] = await Promise.all([ + MongoEvalItem.find({ evalId: evaluation._id }) + .sort({ createTime: -1 }) + .skip(skip) + .limit(limit) + .lean(), + MongoEvalItem.countDocuments({ evalId: evaluation._id }) + ]); + + return { items, total }; + } + static async getEvaluationItem( itemId: string, teamId: string @@ -749,6 +740,38 @@ export class EvaluationTaskService { if (result.matchedCount === 0) { throw new Error(EvaluationErrEnum.evalItemNotFound); } + + // If actual update occurred, re-queue the item for evaluation + if (result.modifiedCount > 0) { + // Get the updated item to determine the evalId + const updatedItem = await MongoEvalItem.findById(itemId, 'evalId'); + if (updatedItem) { + // Reset evaluation results and re-queue + await MongoEvalItem.updateOne( + { _id: new Types.ObjectId(itemId) }, + { + $set: { + status: EvaluationStatusEnum.queuing, + retry: 3 + }, + $unset: { + targetOutput: 1, + evaluatorOutputs: 1, + finishTime: 1, + errorMessage: 1 + } + } + ); + + // Re-submit to evaluation queue + await evaluationItemQueue.add(`eval_item_update_${itemId}`, { + evalId: updatedItem.evalId.toString(), + evalItemId: itemId + }); + + addLog.debug(`[Evaluation] Item updated and re-queued for evaluation: ${itemId}`); + } + } } static async deleteEvaluationItem(itemId: string, teamId: string): Promise { @@ -813,7 +836,7 @@ export class EvaluationTaskService { status: EvaluationStatusEnum.queuing, retry: Math.max(item.retry || 0, 1), // Ensure at least 1 retry chance targetOutput: {}, - evaluatorOutput: {} + evaluatorOutputs: [] }, $unset: { finishTime: 1, @@ -886,7 +909,7 @@ export class EvaluationTaskService { $set: { status: EvaluationStatusEnum.queuing, targetOutput: {}, - evaluatorOutput: {} + evaluatorOutputs: [] }, $unset: { finishTime: 1, @@ -934,22 +957,9 @@ export class EvaluationTaskService { static async getEvaluationItemResult( itemId: string, teamId: string - ): Promise<{ - item: EvaluationItemSchemaType; - dataItem: EvaluationDataItemType; - response?: string; - result?: MetricResult; - score?: number; - }> { + ): Promise { const item = await this.getEvaluationItem(itemId, teamId); - - return { - item, - dataItem: item.dataItem, - response: item.targetOutput?.actualOutput, - result: item.evaluatorOutput, - score: item.evaluatorOutput?.data?.score - }; + return item; } // Search evaluation items @@ -991,7 +1001,7 @@ export class EvaluationTaskService { scoreFilter.$lte = scoreRange.max; } if (Object.keys(scoreFilter).length > 0) { - filter['evaluatorOutput.data.score'] = scoreFilter; + filter['evaluatorOutputs.0.data.score'] = scoreFilter; } } @@ -1043,10 +1053,10 @@ export class EvaluationTaskService { userInput: item.dataItem?.userInput, expectedOutput: item.dataItem?.expectedOutput, actualOutput: item.targetOutput?.actualOutput, - score: item.evaluatorOutput?.data?.score, + scores: item.evaluatorOutputs?.map((output) => output?.data?.score) || [], status: item.status, targetOutput: item.targetOutput, - evaluatorOutput: item.evaluatorOutput, + evaluatorOutputs: item.evaluatorOutputs, errorMessage: item.errorMessage, finishTime: item.finishTime })); @@ -1058,12 +1068,23 @@ export class EvaluationTaskService { return { results: Buffer.from(''), total: 0 }; } + // Collect all unique metric names from evaluator outputs + const metricNames = new Set(); + items.forEach((item) => { + item.evaluatorOutputs?.forEach((output) => { + if (output?.data?.metricName) { + metricNames.add(output.data.metricName); + } + }); + }); + const sortedMetricNames = Array.from(metricNames).sort(); + const headers = [ 'ItemId', 'UserInput', 'ExpectedOutput', 'ActualOutput', - 'Score', + ...sortedMetricNames, // Dynamic metric columns 'Status', 'ErrorMessage', 'FinishTime' @@ -1072,12 +1093,21 @@ export class EvaluationTaskService { const csvRows = [headers.join(',')]; items.forEach((item) => { + // Create a map of metric name to score for easier lookup + const metricScoreMap = new Map(); + item.evaluatorOutputs?.forEach((output) => { + if (output?.data?.metricName && output.data.score !== undefined) { + metricScoreMap.set(output.data.metricName, output.data.score); + } + }); + const row = [ item._id.toString(), `"${(item.dataItem?.userInput || '').replace(/"/g, '""')}"`, `"${(item.dataItem?.expectedOutput || '').replace(/"/g, '""')}"`, `"${(item.targetOutput?.actualOutput || '').replace(/"/g, '""')}"`, - item.evaluatorOutput?.data?.score || '', + // Add scores for each metric column in the same order as headers + ...sortedMetricNames.map((metricName) => metricScoreMap.get(metricName) || ''), item.status || '', `"${(item.errorMessage || '').replace(/"/g, '""')}"`, item.finishTime || '' @@ -1088,351 +1118,5 @@ export class EvaluationTaskService { return { results: Buffer.from(csvRows.join('\n')), total }; } } - - // ========================= DataItem Aggregation APIs ========================= - - static async listDataItemsGrouped( - teamId: string, - options: { - evalId: string; - status?: number; - keyword?: string; - offset?: number; - pageSize?: number; - } - ): Promise { - const { evalId, status, keyword, offset = 0, pageSize = 20 } = options; - - // Verify team access to the evaluation task - await this.getEvaluation(evalId, teamId); - - // Build match stage - const matchStage: any = { - evalId: new Types.ObjectId(evalId) - }; - - if (status !== undefined) { - matchStage.status = status; - } - - if (keyword) { - matchStage.$or = [ - { 'dataItem.userInput': { $regex: keyword, $options: 'i' } }, - { 'dataItem.expectedOutput': { $regex: keyword, $options: 'i' } } - ]; - } - - // Build aggregation pipeline - const aggregationPipeline = [ - { $match: matchStage }, - { - $group: { - _id: '$dataItem._id', - dataItem: { $first: '$dataItem' }, - items: { $push: '$$ROOT' }, - totalItems: { $sum: 1 }, - completedItems: { - $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.completed] }, 1, 0] } - }, - errorItems: { - $sum: { $cond: [{ $eq: ['$status', EvaluationStatusEnum.error] }, 1, 0] } - } - } - }, - { - $addFields: { - dataItemId: '$_id', - 'statistics.totalItems': '$totalItems', - 'statistics.completedItems': '$completedItems', - 'statistics.errorItems': '$errorItems' - } - }, - { $sort: { totalItems: -1 as const, _id: 1 as const } } - ]; - - // Simple Promise.all approach like listEvaluationItems - const [list, total] = await Promise.all([ - // Get paginated results with projection - MongoEvalItem.aggregate([ - ...aggregationPipeline, - { $skip: offset }, - { $limit: pageSize }, - { - $project: { - dataItemId: 1, - dataItem: 1, - items: { - $map: { - input: '$items', - as: 'item', - in: { - $mergeObjects: ['$$item', { evalItemId: { $toString: '$$item._id' } }] - } - } - }, - statistics: { - totalItems: '$totalItems', - completedItems: '$completedItems', - errorItems: '$errorItems' - } - } - } - ]), - // Get total count - MongoEvalItem.aggregate([...aggregationPipeline, { $count: 'total' }]).then( - (result) => result[0]?.total || 0 - ) - ]); - - return { - list, - total - }; - } - - static async deleteEvaluationItemsByDataItem( - dataItemId: string, - teamId: string, - evalId: string - ): Promise<{ deletedCount: number }> { - // Verify team access to the evaluation task - await this.getEvaluation(evalId, teamId); - - const filter: any = { - 'dataItem._id': new Types.ObjectId(dataItemId), - evalId: new Types.ObjectId(evalId) - }; - - // Find items to delete - const itemsToDelete = await MongoEvalItem.find(filter).lean(); - - if (itemsToDelete.length === 0) { - return { deletedCount: 0 }; - } - - const deleteOperation = async (session: ClientSession) => { - // Clean up queue jobs for items to be deleted - const itemIds = itemsToDelete.map((item) => item._id.toString()); - const cleanupPromises = itemIds.map((itemId) => - removeEvaluationItemJobsByItemId(itemId, { - forceCleanActiveJobs: true, - retryAttempts: 3, - retryDelay: 200 - }) - ); - - await Promise.allSettled(cleanupPromises); - - // Delete the items - const result = await MongoEvalItem.deleteMany(filter, { session }); - - addLog.debug(`[Evaluation] Deleted ${result.deletedCount} items for dataItem: ${dataItemId}`); - - return result.deletedCount; - }; - - const deletedCount = await mongoSessionRun(deleteOperation); - - return { - deletedCount - }; - } - - static async retryEvaluationItemsByDataItem( - dataItemId: string, - teamId: string, - evalId: string - ): Promise<{ retriedCount: number }> { - // Verify evaluation access first - await this.getEvaluation(evalId, teamId); - - const filter: any = { - 'dataItem._id': new Types.ObjectId(dataItemId), - evalId: new Types.ObjectId(evalId), - status: EvaluationStatusEnum.error - }; - - // Find items to retry - const itemsToRetry = await MongoEvalItem.find(filter).lean(); - - if (itemsToRetry.length === 0) { - return { retriedCount: 0 }; - } - - const retryOperation = async (session: ClientSession) => { - // Clean up existing jobs - const itemIds = itemsToRetry.map((item) => item._id.toString()); - const cleanupPromises = itemIds.map((itemId) => - removeEvaluationItemJobsByItemId(itemId, { - forceCleanActiveJobs: true, - retryAttempts: 3, - retryDelay: 200 - }) - ); - - await Promise.allSettled(cleanupPromises); - - // Update items status - const result = await MongoEvalItem.updateMany( - { _id: { $in: itemsToRetry.map((item) => item._id) } }, - { - $set: { - status: EvaluationStatusEnum.queuing, - targetOutput: {}, - evaluatorOutput: {} - }, - $unset: { - finishTime: 1, - errorMessage: 1 - }, - $inc: { retry: 1 } - }, - { session } - ); - - // Resubmit to queue - const jobs = itemsToRetry.map((item, index) => ({ - name: `eval_item_dataitem_retry_${dataItemId}_${index}`, - data: { - evalId: item.evalId, - evalItemId: item._id.toString() - }, - opts: { - delay: index * 100 - } - })); - - await evaluationItemQueue.addBulk(jobs); - - addLog.debug( - `[Evaluation] Retried ${result.modifiedCount} items for dataItem: ${dataItemId}` - ); - - return result.modifiedCount; - }; - - const retriedCount = await mongoSessionRun(retryOperation); - - return { - retriedCount - }; - } - - static async updateEvaluationItemsByDataItem( - dataItemId: string, - updates: { - userInput?: string; - expectedOutput?: string; - context?: string[]; - targetCallParams?: TargetCallParams; - }, - teamId: string, - evalId: string - ): Promise<{ updatedCount: number }> { - // Verify evaluation access first - await this.getEvaluation(evalId, teamId); - - // Build MongoDB update object with dot notation - const updateObj = this.buildEvaluationDataItemUpdateObject(updates); - if (Object.keys(updateObj).length === 0) { - return { updatedCount: 0 }; - } - - const filter: any = { - 'dataItem._id': new Types.ObjectId(dataItemId), - evalId: new Types.ObjectId(evalId) - }; - - const result = await MongoEvalItem.updateMany(filter, { $set: updateObj }); - - addLog.debug(`[Evaluation] Updated ${result.modifiedCount} items for dataItem: ${dataItemId}`); - - return { - updatedCount: result.modifiedCount - }; - } - - static async exportEvaluationResultsGroupedByDataItem( - teamId: string, - evalId: string, - format: 'csv' | 'json' = 'json' - ): Promise<{ results: Buffer; totalItems: number }> { - // Get evaluation config for metric names - const evaluation = await this.getEvaluation(evalId, teamId); - - // Use listDataItemsGrouped to get all dataItems (large pageSize to get all) - const { list: dataItems } = await this.listDataItemsGrouped(teamId, { - evalId, - offset: 0, - pageSize: MAX_EXPORT_PAGE_SIZE // Large pageSize to get all items - }); - - if (dataItems.length === 0) { - const emptyResult = format === 'json' ? '[]' : ''; - return { - results: Buffer.from(emptyResult), - totalItems: 0 - }; - } - - // Extract metric names from evaluation config - const metricNames = evaluation.evaluators.map( - (evaluator) => evaluator.metric.name || evaluator.metric._id || 'Unknown Metric' - ); - - // Transform listDataItemsGrouped result to export format (remove totalItems, completedItems, errorItems) - const exportData = dataItems.map((groupedItem: any) => { - const dataItemExport = { - dataItemId: groupedItem.dataItemId, - userInput: groupedItem.dataItem?.userInput, - expectedOutput: groupedItem.dataItem?.expectedOutput, - actualOutput: groupedItem.items.find((item: any) => item.targetOutput?.actualOutput) - ?.targetOutput?.actualOutput, - // Build metric scores object - metricScores: {} as Record - }; - - // Add scores for each metric from the grouped items - groupedItem.items.forEach((item: any) => { - if (item.evaluator?.metric?.name && item.evaluatorOutput?.data?.score !== undefined) { - const metricName = item.evaluator.metric.name; - dataItemExport.metricScores[metricName] = item.evaluatorOutput.data.score; - } - }); - - return dataItemExport; - }); - - if (format === 'json') { - return { - results: Buffer.from(JSON.stringify(exportData, null, 2)), - totalItems: exportData.length - }; - } else { - // CSV format with dynamic metric columns (remove totalItems, completedItems, errorItems) - const baseHeaders = ['DataItemId', 'UserInput', 'ExpectedOutput', 'ActualOutput']; - - const headers = [...baseHeaders, ...metricNames]; - const csvRows = [headers.join(',')]; - - exportData.forEach((dataItem) => { - const row = [ - dataItem.dataItemId, - `"${(dataItem.userInput || '').replace(/"/g, '""')}"`, - `"${(dataItem.expectedOutput || '').replace(/"/g, '""')}"`, - `"${(dataItem.actualOutput || '').replace(/"/g, '""')}"`, - // Add metric scores in the same order as headers - ...metricNames.map((metricName) => dataItem.metricScores[metricName] || '') - ]; - - csvRows.push(row.join(',')); - }); - - return { - results: Buffer.from(csvRows.join('\n')), - totalItems: exportData.length - }; - } - } } export { MongoEvaluation }; diff --git a/packages/service/core/evaluation/task/mq.ts b/packages/service/core/evaluation/task/mq.ts index 1d20bccaecd5..21a3730fa436 100644 --- a/packages/service/core/evaluation/task/mq.ts +++ b/packages/service/core/evaluation/task/mq.ts @@ -28,12 +28,16 @@ export const evaluationItemQueue = getQueue(QueueNames.ev export const getEvaluationTaskWorker = (processor: any) => getWorker(QueueNames.evalTask, processor, { - concurrency: Number(process.env.EVAL_TASK_CONCURRENCY) || 3 + concurrency: Number(process.env.EVAL_TASK_CONCURRENCY) || 3, + stalledInterval: Number(process.env.EVAL_TASK_STALLED_INTERVAL) || 60000, // 1 minute + maxStalledCount: Number(process.env.EVAL_TASK_MAX_STALLED_COUNT) || 3 }); export const getEvaluationItemWorker = (processor: any) => getWorker(QueueNames.evalTaskItem, processor, { - concurrency: Number(process.env.EVAL_ITEM_CONCURRENCY) || 10 + concurrency: Number(process.env.EVAL_ITEM_CONCURRENCY) || 10, + stalledInterval: Number(process.env.EVAL_ITEM_STALLED_INTERVAL) || 300000, // 5 minutes + maxStalledCount: Number(process.env.EVAL_ITEM_MAX_STALLED_COUNT) || 3 }); export const removeEvaluationTaskJob = async ( diff --git a/packages/service/core/evaluation/task/processor.ts b/packages/service/core/evaluation/task/processor.ts index 2ff97840bfde..8ca08bfef389 100644 --- a/packages/service/core/evaluation/task/processor.ts +++ b/packages/service/core/evaluation/task/processor.ts @@ -2,7 +2,8 @@ import { addLog } from '../../../common/system/log'; import type { Job } from '../../../common/bullmq'; import type { EvaluationTaskJobData, - EvaluationItemJobData + EvaluationItemJobData, + TargetOutput } from '@fastgpt/global/core/evaluation/type'; import { evaluationItemQueue, getEvaluationItemWorker, getEvaluationTaskWorker } from './mq'; import { MongoEvaluation, MongoEvalItem } from './schema'; @@ -17,6 +18,7 @@ import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation' import { getErrText } from '@fastgpt/global/common/error/utils'; import { createMergedEvaluationUsage } from '../utils/usage'; import { EvaluationSummaryService } from '../summary'; +import type { MetricResult } from '@fastgpt/global/core/evaluation/metric/type'; // Sleep utility function const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); @@ -52,6 +54,31 @@ export class EvaluationStageError extends Error { } } +// Aggregated error class for multiple evaluator errors +export class EvaluatorAggregatedError extends Error { + public readonly errors: Array<{ + evaluatorName: string; + error: string; + retriable: boolean; + }>; + public readonly retriable: boolean; + + constructor(errors: Array<{ evaluatorName: string; error: string }>) { + const errorMessages = errors.map((e) => `${e.evaluatorName}: ${e.error}`); + super(`Evaluator errors: ${errorMessages.join('; ')}`); + this.name = 'EvaluatorAggregatedError'; + + // Check retriability for each error and determine overall retriability + this.errors = errors.map((e) => ({ + ...e, + retriable: isEvaluatorExecutionRetriable(e.error) + })); + + // Consider aggregated error retriable if any individual error is retriable + this.retriable = this.errors.some((e) => e.retriable); + } +} + // Distributed lock implementation const distributedLocks = new Map(); @@ -161,21 +188,17 @@ const analyzeError = ( return { isRetriable: false }; }; -// Backward compatibility function -const matchesRetriablePattern = (error: any): boolean => { - return analyzeError(error).isRetriable; -}; - // Determine if target execution error should be retriable const isTargetExecutionRetriable = (error: any): boolean => { if (error === TeamErrEnum.aiPointsNotEnough) return false; - return matchesRetriablePattern(error); + if (error === EvaluationErrEnum.evalTargetOutputRequired) return true; + return analyzeError(error).isRetriable; }; // Determine if evaluator execution error should be retriable const isEvaluatorExecutionRetriable = (error: any): boolean => { if (error === TeamErrEnum.aiPointsNotEnough) return false; - return matchesRetriablePattern(error); + return analyzeError(error).isRetriable; }; // General error retriability check for handleEvalItemError @@ -185,7 +208,12 @@ const isRetriableError = (error: any): boolean => { return error.retriable; } - return matchesRetriablePattern(error); + // If it's an aggregated error, use its retriable flag + if (error instanceof EvaluatorAggregatedError) { + return error.retriable; + } + + return analyzeError(error).isRetriable; }; // Complete evaluation task - simplified version based on status enum statistics @@ -412,6 +440,9 @@ const evaluationTaskProcessor = async (job: Job) => { addLog.debug(`[Evaluation] Start processing evaluation task: ${evalId}`); try { + // Report initial progress + await job.updateProgress(0); + // Get evaluation task information const evaluation = await MongoEvaluation.findById(evalId).lean(); if (!evaluation) { @@ -425,6 +456,9 @@ const evaluationTaskProcessor = async (job: Job) => { teamId: evaluation.teamId }).lean(); + // Report progress: dataset loaded + await job.updateProgress(20); + // TODO: Handle targetCallParams population for evaluation data items // The dataItems loaded from dataset only contain basic EvalDatasetDataSchemaType fields // but evaluation items need EvaluationDataItemType (including targetCallParams). @@ -476,24 +510,25 @@ const evaluationTaskProcessor = async (job: Job) => { return; } - // Create evaluation items for each dataItem and each evaluator (atomic structure) + // Create evaluation items for each dataItem with all evaluators (batch structure) const evalItems = []; for (const dataItem of dataItems) { - for (const evaluator of evaluation.evaluators) { - evalItems.push({ - evalId, - dataItem, - target: evaluation.target, - evaluator, - status: EvaluationStatusEnum.queuing, - retry: maxRetries - }); - } + evalItems.push({ + evalId, + dataItem, + target: evaluation.target, + evaluators: evaluation.evaluators, // All evaluators for this dataItem + status: EvaluationStatusEnum.queuing, + retry: maxRetries + }); } // Batch insert evaluation items const insertedItems = await MongoEvalItem.insertMany(evalItems); - addLog.debug(`[Evaluation] Created ${insertedItems.length} atomic evaluation items`); + addLog.debug(`[Evaluation] Created ${insertedItems.length} batch evaluation items`); + + // Report progress: items created + await job.updateProgress(80); // Submit to evaluation item queue for concurrent processing const jobs = insertedItems.map((item, index) => ({ @@ -509,6 +544,9 @@ const evaluationTaskProcessor = async (job: Job) => { await evaluationItemQueue.addBulk(jobs); + // Report final progress + await job.updateProgress(100); + addLog.debug( `[Evaluation] Task decomposition completed: ${evalId}, submitted ${jobs.length} evaluation items to queue` ); @@ -536,6 +574,9 @@ const evaluationItemProcessor = async (job: Job) => { addLog.debug(`[Evaluation] Start processing evaluation item: ${evalItemId}`); try { + // Report initial progress + await job.updateProgress(0); + // Get evaluation item information const evalItem = await MongoEvalItem.findById(evalItemId); if (!evalItem) { @@ -574,8 +615,8 @@ const evaluationItemProcessor = async (job: Job) => { } // Initialize outputs - check for existing results first for resume capability - let targetOutput: any = undefined; - let evaluatorOutput: any = undefined; + let targetOutput: TargetOutput | undefined = undefined; + let evaluatorOutputs: MetricResult[] = []; // Resume from checkpoint only if in evaluating status if (evalItem.status === EvaluationStatusEnum.evaluating) { @@ -583,9 +624,9 @@ const evaluationItemProcessor = async (job: Job) => { addLog.debug(`[Evaluation] Resuming targetOutput from evalItem: ${evalItemId}`); targetOutput = evalItem.targetOutput; } - if (evalItem.evaluatorOutput?.data?.score) { - addLog.debug(`[Evaluation] Resuming evaluatorOutput from evalItem: ${evalItemId}`); - evaluatorOutput = evalItem.evaluatorOutput; + if (evalItem.evaluatorOutputs && evalItem.evaluatorOutputs.length > 0) { + addLog.debug(`[Evaluation] Resuming evaluatorOutputs from evalItem: ${evalItemId}`); + evaluatorOutputs = evalItem.evaluatorOutputs; } } else { // For queuing or error status, always start from scratch @@ -600,6 +641,9 @@ const evaluationItemProcessor = async (job: Job) => { { $set: { status: EvaluationStatusEnum.evaluating } } ); + // Report progress: setup completed + await job.updateProgress(10); + // 1. Call evaluation target (if not already done) if (!targetOutput || !targetOutput.actualOutput) { try { @@ -616,6 +660,9 @@ const evaluationItemProcessor = async (job: Job) => { { $set: { targetOutput: targetOutput } } ); + // Report progress: target execution completed + await job.updateProgress(30); + // Record usage from target call if (targetOutput.usage) { const totalPoints = targetOutput.usage.reduce( @@ -631,6 +678,10 @@ const evaluationItemProcessor = async (job: Job) => { type: 'target' }); } + + if (!targetOutput.actualOutput) { + throw new Error(EvaluationErrEnum.evalTargetOutputRequired); + } } catch (error) { // Normalize target execution error const retriable = isTargetExecutionRetriable(error); @@ -645,24 +696,77 @@ const evaluationItemProcessor = async (job: Job) => { } } - // 2. Execute evaluator (if not already done) - let totalMetricPoints = 0; + // 2. Execute evaluators (batch processing - only execute missing ones) + const completedCount = evaluatorOutputs.filter( + (output) => output?.data?.score !== undefined + ).length; + const needToExecute = evalItem.evaluators.length - completedCount; + + if (needToExecute > 0) { + const errors: Array<{ evaluatorName: string; error: string }> = []; - if (!evaluatorOutput || !evaluatorOutput.data?.score) { try { - const evaluatorInstance = await createEvaluatorInstance(evalItem.evaluator, { - validate: false - }); + // Execute only missing evaluators + for (let i = completedCount; i < evalItem.evaluators.length; i++) { + const evaluator = evalItem.evaluators[i]; - evaluatorOutput = await evaluatorInstance.evaluate({ - userInput: evalItem.dataItem.userInput, - expectedOutput: evalItem.dataItem.expectedOutput, - actualOutput: targetOutput.actualOutput, - context: evalItem.dataItem.context, - retrievalContext: targetOutput.retrievalContext - }); + const evaluatorInstance = await createEvaluatorInstance(evaluator, { + validate: false + }); + + const evaluatorOutput = await evaluatorInstance.evaluate({ + userInput: evalItem.dataItem.userInput, + expectedOutput: evalItem.dataItem.expectedOutput, + actualOutput: targetOutput.actualOutput, + context: evalItem.dataItem.context, + retrievalContext: targetOutput.retrievalContext + }); + + await createMergedEvaluationUsage({ + evalId, + teamId: evaluation.teamId, + tmbId: evaluation.tmbId, + usageId: evaluation.usageId, + totalPoints: evaluatorOutput.totalPoints || 0, + type: 'metric' + }); + + // Record error but continue processing + if (evaluatorOutput.status === 'failed' || evaluatorOutput.error) { + const errorMessage = evaluatorOutput.error || 'Evaluator execution failed'; + const evaluatorName = evaluator.metric.name || `Evaluator ${i + 1}`; + errors.push({ evaluatorName, error: errorMessage }); + } + + evaluatorOutputs.push(evaluatorOutput); + + // Save progress after each evaluator (checkpoint for resume) + await MongoEvalItem.updateOne( + { _id: new Types.ObjectId(evalItemId) }, + { $set: { evaluatorOutputs: evaluatorOutputs } } + ); + + // Report progress: evaluator completed + const evaluatorProgress = 30 + (60 * (i + 1)) / evalItem.evaluators.length; + await job.updateProgress(Math.round(evaluatorProgress)); + } + + // After all evaluators, check if there were any errors + if (errors.length > 0) { + throw new EvaluatorAggregatedError(errors); + } } catch (error) { - // Normalize evaluator execution error + // If it's already an EvaluatorAggregatedError, wrap it in EvaluationStageError + if (error instanceof EvaluatorAggregatedError) { + throw new EvaluationStageError( + EvaluationStageEnum.EvaluatorExecute, + error.message, + error.retriable, + error + ); + } + + // Normalize other evaluator execution errors const retriable = isEvaluatorExecutionRetriable(error); const errorMessage = getErrText(error) || 'Evaluator execution failed'; @@ -674,39 +778,27 @@ const evaluationItemProcessor = async (job: Job) => { ); } } - - // Record usage from metric evaluation - if (evaluatorOutput.totalPoints) { - totalMetricPoints += evaluatorOutput.totalPoints || 0; - } - - // Record usage from metric evaluation - if (totalMetricPoints > 0) { - await createMergedEvaluationUsage({ - evalId, - teamId: evaluation.teamId, - tmbId: evaluation.tmbId, - usageId: evaluation.usageId, - totalPoints: totalMetricPoints, - type: 'metric' - }); - } - // 3. Store results await MongoEvalItem.updateOne( { _id: new Types.ObjectId(evalItemId) }, { $set: { targetOutput: targetOutput, - evaluatorOutput: evaluatorOutput, + evaluatorOutputs: evaluatorOutputs, status: EvaluationStatusEnum.completed, finishTime: new Date() } } ); + // Report final progress + await job.updateProgress(100); + + const scores = evaluatorOutputs + .map((output) => output?.data?.score) + .filter((score) => score !== undefined); addLog.debug( - `[Evaluation] Evaluation item completed: ${evalItemId}, score: ${evaluatorOutput?.data?.score}` + `[Evaluation] Evaluation item completed: ${evalItemId}, scores: [${scores.join(', ')}]` ); } catch (error) { addLog.error(`[Evaluation] Evaluation item error: ${evalItemId}, error: ${error}`); diff --git a/packages/service/core/evaluation/task/schema.ts b/packages/service/core/evaluation/task/schema.ts index f4555b753979..721da95e03d7 100644 --- a/packages/service/core/evaluation/task/schema.ts +++ b/packages/service/core/evaluation/task/schema.ts @@ -215,15 +215,15 @@ export const EvaluationItemSchema = new Schema({ required: true }, target: EvaluationTargetSchema, - evaluator: EvaluationEvaluatorSchema, // Single evaluator configuration + evaluators: [EvaluationEvaluatorSchema], // Multiple evaluator configurations // Execution results targetOutput: { type: Schema.Types.Mixed, default: {} }, - evaluatorOutput: { - type: Schema.Types.Mixed, - default: {} + evaluatorOutputs: { + type: [Schema.Types.Mixed], + default: [] }, status: { type: Number, diff --git a/projects/app/src/pages/api/core/evaluation/task/dataItem/delete.ts b/projects/app/src/pages/api/core/evaluation/task/dataItem/delete.ts deleted file mode 100644 index e21a1fa3f754..000000000000 --- a/projects/app/src/pages/api/core/evaluation/task/dataItem/delete.ts +++ /dev/null @@ -1,58 +0,0 @@ -import type { ApiRequestProps } from '@fastgpt/service/type/next'; -import { NextAPI } from '@/service/middleware/entry'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import type { - DeleteDataItemRequest, - DeleteDataItemResponse -} from '@fastgpt/global/core/evaluation/api'; -import { authEvaluationTaskWrite } from '@fastgpt/service/core/evaluation/common'; -import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; -import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; -import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; - -async function handler( - req: ApiRequestProps -): Promise { - const { dataItemId, evalId } = req.body; - - if (!dataItemId) { - throw new Error(EvaluationErrEnum.evalDataItemIdRequired); - } - - if (!evalId) { - throw new Error(EvaluationErrEnum.evalIdRequired); - } - - const { evaluation, teamId, tmbId } = await authEvaluationTaskWrite(evalId, { - req, - authApiKey: true, - authToken: true - }); - - const result = await EvaluationTaskService.deleteEvaluationItemsByDataItem( - dataItemId, - teamId, - evalId - ); - - // Add audit log for dataItem deletion - (async () => { - addAuditLog({ - tmbId, - teamId, - event: AuditEventEnum.DELETE_EVALUATION_TASK_DATA_ITEM, - params: { - taskName: evaluation.name, - dataItemId: dataItemId - } - }); - })(); - - return { - message: `Successfully deleted ${result.deletedCount} evaluation items`, - deletedCount: result.deletedCount - }; -} - -export default NextAPI(handler); -export { handler }; diff --git a/projects/app/src/pages/api/core/evaluation/task/dataItem/export.ts b/projects/app/src/pages/api/core/evaluation/task/dataItem/export.ts deleted file mode 100644 index 68271f8651d3..000000000000 --- a/projects/app/src/pages/api/core/evaluation/task/dataItem/export.ts +++ /dev/null @@ -1,55 +0,0 @@ -import type { ApiRequestProps } from '@fastgpt/service/type/next'; -import { NextAPI } from '@/service/middleware/entry'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import type { ExportDataItemsResultsRequest } from '@fastgpt/global/core/evaluation/api'; -import { authEvaluationTaskRead } from '@fastgpt/service/core/evaluation/common'; -import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; -import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; -import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; - -async function handler(req: ApiRequestProps) { - const { evalId, format = 'json' } = req.body; - - if (!evalId) { - throw new Error(EvaluationErrEnum.evalIdRequired); - } - - if (format && !['csv', 'json'].includes(format)) { - throw new Error(EvaluationErrEnum.evalInvalidFormat); - } - - const { evaluation, teamId, tmbId } = await authEvaluationTaskRead(evalId, { - req, - authApiKey: true, - authToken: true - }); - - const result = await EvaluationTaskService.exportEvaluationResultsGroupedByDataItem( - teamId, - evalId, - format as 'csv' | 'json' - ); - - // Add audit log for dataItems export - (async () => { - addAuditLog({ - tmbId, - teamId, - event: AuditEventEnum.EXPORT_EVALUATION_TASK_DATA_ITEMS, - params: { - taskName: evaluation.name, - format, - itemCount: result.totalItems - } - }); - })(); - - return { - results: result.results, - fileName: `evaluation_${evalId}_dataItems.${format}`, - contentType: format === 'csv' ? 'text/csv' : 'application/json' - }; -} - -export default NextAPI(handler); -export { handler }; diff --git a/projects/app/src/pages/api/core/evaluation/task/dataItem/list.ts b/projects/app/src/pages/api/core/evaluation/task/dataItem/list.ts deleted file mode 100644 index 6a51ccba2335..000000000000 --- a/projects/app/src/pages/api/core/evaluation/task/dataItem/list.ts +++ /dev/null @@ -1,42 +0,0 @@ -import type { ApiRequestProps } from '@fastgpt/service/type/next'; -import { NextAPI } from '@/service/middleware/entry'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import type { - DataItemListRequest, - DataItemListResponse -} from '@fastgpt/global/core/evaluation/api'; -import { authEvaluationTaskRead } from '@fastgpt/service/core/evaluation/common'; -import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination'; -import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; - -async function handler(req: ApiRequestProps): Promise { - const { offset, pageSize } = parsePaginationRequest(req); - const { evalId, status, keyword } = req.body; - - if (!evalId) { - throw new Error(EvaluationErrEnum.evalIdRequired); - } - - // Use existing evaluation task read permission - const { teamId } = await authEvaluationTaskRead(evalId, { - req, - authApiKey: true, - authToken: true - }); - - const result = await EvaluationTaskService.listDataItemsGrouped(teamId, { - evalId, - status: status !== undefined ? Number(status) : undefined, - keyword, - offset, - pageSize - }); - - return { - list: result.list, - total: result.total - }; -} - -export default NextAPI(handler); -export { handler }; diff --git a/projects/app/src/pages/api/core/evaluation/task/dataItem/retry.ts b/projects/app/src/pages/api/core/evaluation/task/dataItem/retry.ts deleted file mode 100644 index dd7c41984ac4..000000000000 --- a/projects/app/src/pages/api/core/evaluation/task/dataItem/retry.ts +++ /dev/null @@ -1,56 +0,0 @@ -import type { ApiRequestProps } from '@fastgpt/service/type/next'; -import { NextAPI } from '@/service/middleware/entry'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import type { - RetryDataItemRequest, - RetryDataItemResponse -} from '@fastgpt/global/core/evaluation/api'; -import { authEvaluationTaskWrite } from '@fastgpt/service/core/evaluation/common'; -import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; -import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; -import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; - -async function handler(req: ApiRequestProps): Promise { - const { dataItemId, evalId } = req.body; - - if (!dataItemId) { - throw new Error(EvaluationErrEnum.evalDataItemIdRequired); - } - - if (!evalId) { - throw new Error(EvaluationErrEnum.evalIdRequired); - } - - const { evaluation, teamId, tmbId } = await authEvaluationTaskWrite(evalId, { - req, - authApiKey: true, - authToken: true - }); - - const result = await EvaluationTaskService.retryEvaluationItemsByDataItem( - dataItemId, - teamId, - evalId - ); - - // Add audit log for dataItem retry - (async () => { - addAuditLog({ - tmbId, - teamId, - event: AuditEventEnum.RETRY_EVALUATION_TASK_DATA_ITEM, - params: { - taskName: evaluation.name, - dataItemId: dataItemId - } - }); - })(); - - return { - message: `Successfully retried ${result.retriedCount} evaluation items`, - retriedCount: result.retriedCount - }; -} - -export default NextAPI(handler); -export { handler }; diff --git a/projects/app/src/pages/api/core/evaluation/task/dataItem/update.ts b/projects/app/src/pages/api/core/evaluation/task/dataItem/update.ts deleted file mode 100644 index 85e279e16448..000000000000 --- a/projects/app/src/pages/api/core/evaluation/task/dataItem/update.ts +++ /dev/null @@ -1,67 +0,0 @@ -import type { ApiRequestProps } from '@fastgpt/service/type/next'; -import { NextAPI } from '@/service/middleware/entry'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import type { - UpdateDataItemRequest, - UpdateDataItemResponse -} from '@fastgpt/global/core/evaluation/api'; -import { authEvaluationTaskWrite } from '@fastgpt/service/core/evaluation/common'; -import { addAuditLog } from '@fastgpt/service/support/user/audit/util'; -import { AuditEventEnum } from '@fastgpt/global/support/user/audit/constants'; -import { EvalDatasetDataKeyEnum } from '@fastgpt/global/core/evaluation/dataset/constants'; -import { EvaluationErrEnum } from '@fastgpt/global/common/error/code/evaluation'; - -async function handler( - req: ApiRequestProps -): Promise { - const { - dataItemId, - evalId, - [EvalDatasetDataKeyEnum.UserInput]: userInput, - [EvalDatasetDataKeyEnum.ExpectedOutput]: expectedOutput, - [EvalDatasetDataKeyEnum.Context]: context, - targetCallParams - } = req.body; - - if (!dataItemId) { - throw new Error(EvaluationErrEnum.evalDataItemIdRequired); - } - - if (!evalId) { - throw new Error(EvaluationErrEnum.evalIdRequired); - } - - const { evaluation, teamId, tmbId } = await authEvaluationTaskWrite(evalId, { - req, - authApiKey: true, - authToken: true - }); - - const result = await EvaluationTaskService.updateEvaluationItemsByDataItem( - dataItemId, - { userInput, expectedOutput, context, targetCallParams }, - teamId, - evalId - ); - - // Add audit log for dataItem update - (async () => { - addAuditLog({ - tmbId, - teamId, - event: AuditEventEnum.UPDATE_EVALUATION_TASK_DATA_ITEM, - params: { - taskName: evaluation.name, - dataItemId: dataItemId - } - }); - })(); - - return { - message: `Successfully updated ${result.updatedCount} evaluation items`, - updatedCount: result.updatedCount - }; -} - -export default NextAPI(handler); -export { handler }; diff --git a/projects/app/src/pages/api/core/evaluation/task/list.ts b/projects/app/src/pages/api/core/evaluation/task/list.ts index 8b023a63ee72..246218ddeb26 100644 --- a/projects/app/src/pages/api/core/evaluation/task/list.ts +++ b/projects/app/src/pages/api/core/evaluation/task/list.ts @@ -25,7 +25,7 @@ async function handler( req: ApiRequestProps ): Promise { const { offset, pageSize } = parsePaginationRequest(req); - const { searchKey, appName, appId, versionId } = req.body; + const { searchKey, appName, appId } = req.body; const { teamId, tmbId, isOwner, roleList, myGroupMap, myOrgSet } = await getEvaluationPermissionAggregation({ @@ -51,8 +51,7 @@ async function handler( tmbId, isOwner, appName?.trim(), - appId?.trim(), - versionId?.trim() + appId?.trim() ); const formatEvaluations = result.list diff --git a/test/cases/pages/api/core/evaluation/task/dataItem/delete.test.ts b/test/cases/pages/api/core/evaluation/task/dataItem/delete.test.ts deleted file mode 100644 index e720143b0cbe..000000000000 --- a/test/cases/pages/api/core/evaluation/task/dataItem/delete.test.ts +++ /dev/null @@ -1,149 +0,0 @@ -import { describe, test, expect, vi, beforeEach } from 'vitest'; -import { Types } from '@fastgpt/service/common/mongo'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { handler } from '@/pages/api/core/evaluation/task/dataItem/delete'; - -// Mock NextAPI wrapper -vi.mock('@/service/middleware/entry', () => ({ - NextAPI: vi.fn((handler) => handler) -})); - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task', () => ({ - EvaluationTaskService: { - deleteEvaluationItemsByDataItem: vi.fn() - } -})); - -vi.mock('@fastgpt/service/core/evaluation/common', () => ({ - authEvaluationTaskWrite: vi.fn() -})); - -vi.mock('@fastgpt/service/support/user/audit/util', () => ({ - addAuditLog: vi.fn() -})); - -describe('Delete DataItem API Handler', () => { - const mockRequest = (body: any) => - ({ - body, - method: 'POST' - }) as any; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - test('应该成功删除数据项的所有评估项', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockDeleteResult = { - deletedCount: 3 - }; - (EvaluationTaskService.deleteEvaluationItemsByDataItem as any).mockResolvedValue( - mockDeleteResult - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123' - }); - - const result = await handler(req); - - expect(authEvaluationTaskWrite).toHaveBeenCalledWith('eval-123', { - req, - authApiKey: true, - authToken: true - }); - expect(EvaluationTaskService.deleteEvaluationItemsByDataItem).toHaveBeenCalledWith( - 'data-item-123', - '507f1f77bcf86cd799439011', - 'eval-123' - ); - expect(result).toEqual({ - message: 'Successfully deleted 3 evaluation items', - deletedCount: 3 - }); - }); - - test('数据项不存在时应该返回deletedCount为0', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockDeleteResult = { - deletedCount: 0 - }; - (EvaluationTaskService.deleteEvaluationItemsByDataItem as any).mockResolvedValue( - mockDeleteResult - ); - - const req = mockRequest({ - dataItemId: 'non-existent-data-item', - evalId: 'eval-123' - }); - - const result = await handler(req); - - expect(result).toEqual({ - message: 'Successfully deleted 0 evaluation items', - deletedCount: 0 - }); - }); - - test('缺少dataItemId时应该抛出错误', async () => { - const req = mockRequest({ - evalId: 'eval-123' - }); - - await expect(handler(req)).rejects.toThrow('evaluationDataItemIdRequired'); - }); - - test('缺少evalId时应该抛出错误', async () => { - const req = mockRequest({ - dataItemId: 'data-item-123' - }); - - await expect(handler(req)).rejects.toThrow('evaluationIdRequired'); - }); - - test('认证失败时应该抛出错误', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockRejectedValue(new Error('Permission denied')); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123' - }); - - await expect(handler(req)).rejects.toThrow('Permission denied'); - }); - - test('服务层异常时应该抛出错误', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - (EvaluationTaskService.deleteEvaluationItemsByDataItem as any).mockRejectedValue( - new Error('Database connection failed') - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123' - }); - - await expect(handler(req)).rejects.toThrow('Database connection failed'); - }); -}); diff --git a/test/cases/pages/api/core/evaluation/task/dataItem/export.test.ts b/test/cases/pages/api/core/evaluation/task/dataItem/export.test.ts deleted file mode 100644 index aded5623a20f..000000000000 --- a/test/cases/pages/api/core/evaluation/task/dataItem/export.test.ts +++ /dev/null @@ -1,225 +0,0 @@ -import { describe, test, expect, vi, beforeEach } from 'vitest'; -import { Types } from '@fastgpt/service/common/mongo'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { handler } from '@/pages/api/core/evaluation/task/dataItem/export'; - -// Mock NextAPI wrapper -vi.mock('@/service/middleware/entry', () => ({ - NextAPI: vi.fn((handler) => handler) -})); - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task', () => ({ - EvaluationTaskService: { - exportEvaluationResultsGroupedByDataItem: vi.fn() - } -})); - -vi.mock('@fastgpt/service/core/evaluation/common', () => ({ - authEvaluationTaskRead: vi.fn() -})); - -describe('Export DataItem Grouped Results API Handler', () => { - const mockRequest = (body: any) => - ({ - body, - method: 'POST' - }) as any; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - test('应该成功导出JSON格式的数据项分组结果', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockExportResult = { - results: Buffer.from( - JSON.stringify([ - { - dataItemId: 'data-item-123', - userInput: 'Test question', - expectedOutput: 'Test answer', - actualOutput: 'Generated response', - metricScores: { 'Test Metric': 85 } - } - ]) - ), - totalItems: 1 - }; - - (EvaluationTaskService.exportEvaluationResultsGroupedByDataItem as any).mockResolvedValue( - mockExportResult - ); - - const req = mockRequest({ - evalId: 'eval-123', - format: 'json' - }); - - const result = await handler(req); - - expect(EvaluationTaskService.exportEvaluationResultsGroupedByDataItem).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - 'eval-123', - 'json' - ); - expect(result).toEqual({ - results: mockExportResult.results, - fileName: 'evaluation_eval-123_dataItems.json', - contentType: 'application/json' - }); - }); - - test('应该成功导出CSV格式的数据项分组结果', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockExportResult = { - results: Buffer.from( - 'DataItemId,UserInput,ExpectedOutput,ActualOutput,Test Metric\ndata-item-123,Test question,Test answer,Generated response,85' - ), - totalItems: 1 - }; - - (EvaluationTaskService.exportEvaluationResultsGroupedByDataItem as any).mockResolvedValue( - mockExportResult - ); - - const req = mockRequest({ - evalId: 'eval-123', - format: 'csv' - }); - - const result = await handler(req); - - expect(EvaluationTaskService.exportEvaluationResultsGroupedByDataItem).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - 'eval-123', - 'csv' - ); - expect(result).toEqual({ - results: mockExportResult.results, - fileName: 'evaluation_eval-123_dataItems.csv', - contentType: 'text/csv' - }); - }); - - test('默认格式应该是JSON', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockExportResult = { - results: Buffer.from('[]'), - totalItems: 0 - }; - - (EvaluationTaskService.exportEvaluationResultsGroupedByDataItem as any).mockResolvedValue( - mockExportResult - ); - - const req = mockRequest({ - evalId: 'eval-123' - // No format specified - }); - await handler(req); - - expect(EvaluationTaskService.exportEvaluationResultsGroupedByDataItem).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - 'eval-123', - 'json' - ); - }); - - test('空数据时应该正常导出', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockExportResult = { - results: Buffer.from('[]'), - totalItems: 0 - }; - - (EvaluationTaskService.exportEvaluationResultsGroupedByDataItem as any).mockResolvedValue( - mockExportResult - ); - - const req = mockRequest({ - evalId: 'empty-eval-123', - format: 'json' - }); - - const result = await handler(req); - - expect(result).toEqual({ - results: mockExportResult.results, - fileName: 'evaluation_empty-eval-123_dataItems.json', - contentType: 'application/json' - }); - }); - - test('缺少evalId时应该抛出错误', async () => { - const req = mockRequest({ - format: 'json' - }); - - await expect(() => handler(req)).rejects.toThrow('evaluationIdRequired'); - }); - - test('无效格式时应该抛出错误', async () => { - const req = mockRequest({ - evalId: 'eval-123', - format: 'invalid' - }); - - await expect(() => handler(req)).rejects.toThrow('evaluationInvalidFormat'); - }); - - test('评估任务不存在时应该抛出错误', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockRejectedValue(new Error('evaluationTaskNotFound')); - - const req = mockRequest({ - evalId: 'invalid-eval-id', - format: 'json' - }); - - await expect(() => handler(req)).rejects.toThrow('evaluationTaskNotFound'); - }); - - test('服务层异常时应该抛出错误', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - (EvaluationTaskService.exportEvaluationResultsGroupedByDataItem as any).mockRejectedValue( - new Error('Export failed') - ); - - const req = mockRequest({ - evalId: 'eval-123', - format: 'json' - }); - - await expect(() => handler(req)).rejects.toThrow('Export failed'); - }); -}); diff --git a/test/cases/pages/api/core/evaluation/task/dataItem/list.test.ts b/test/cases/pages/api/core/evaluation/task/dataItem/list.test.ts deleted file mode 100644 index 5579799507b1..000000000000 --- a/test/cases/pages/api/core/evaluation/task/dataItem/list.test.ts +++ /dev/null @@ -1,226 +0,0 @@ -import { describe, test, expect, vi, beforeEach } from 'vitest'; -import { Types } from '@fastgpt/service/common/mongo'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { EvaluationStatusEnum } from '@fastgpt/global/core/evaluation/constants'; -import { handler } from '@/pages/api/core/evaluation/task/dataItem/list'; - -// Mock NextAPI wrapper -vi.mock('@/service/middleware/entry', () => ({ - NextAPI: vi.fn((handler) => handler) -})); - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task', () => ({ - EvaluationTaskService: { - listDataItemsGrouped: vi.fn() - } -})); - -vi.mock('@fastgpt/service/core/evaluation/common', () => ({ - authEvaluationTaskRead: vi.fn() -})); - -describe('List DataItems Grouped API Handler', () => { - const mockDataItemGrouped = { - dataItemId: 'data-item-123', - dataItem: { - userInput: 'Test question', - expectedOutput: 'Test answer' - }, - items: [ - { - _id: new Types.ObjectId(), - evalId: new Types.ObjectId(), - status: EvaluationStatusEnum.completed, - evaluatorOutput: { data: { score: 85 } } - } - ], - statistics: { - totalItems: 2, - completedItems: 1, - errorItems: 0 - } - }; - - const mockRequest = (body: any) => - ({ - body, - method: 'POST' - }) as any; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - test('应该成功获取分组的数据项列表', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockResult = { - list: [mockDataItemGrouped], - total: 1 - }; - - (EvaluationTaskService.listDataItemsGrouped as any).mockResolvedValue(mockResult); - - const req = mockRequest({ - evalId: 'eval-123', - pageNum: 1, - pageSize: 20 - }); - - const result = await handler(req); - - expect(EvaluationTaskService.listDataItemsGrouped).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - { - evalId: 'eval-123', - keyword: undefined, - status: undefined, - offset: 0, - pageSize: 20 - } - ); - expect(result).toEqual(mockResult); - }); - - test('应该支持状态过滤', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockResult = { - list: [mockDataItemGrouped], - total: 1 - }; - - (EvaluationTaskService.listDataItemsGrouped as any).mockResolvedValue(mockResult); - - const req = mockRequest({ - evalId: 'eval-123', - status: EvaluationStatusEnum.completed, - pageNum: 1, - pageSize: 20 - }); - - await handler(req); - - expect(EvaluationTaskService.listDataItemsGrouped).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - { - evalId: 'eval-123', - keyword: undefined, - status: EvaluationStatusEnum.completed, - offset: 0, - pageSize: 20 - } - ); - }); - - test('应该支持关键词搜索', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockResult = { - list: [mockDataItemGrouped], - total: 1 - }; - - (EvaluationTaskService.listDataItemsGrouped as any).mockResolvedValue(mockResult); - - const req = mockRequest({ - evalId: 'eval-123', - keyword: 'test', - pageNum: 1, - pageSize: 20 - }); - - await handler(req); - - expect(EvaluationTaskService.listDataItemsGrouped).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - { - evalId: 'eval-123', - keyword: 'test', - status: undefined, - offset: 0, - pageSize: 20 - } - ); - }); - - test('应该处理分页参数', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockResult = { - list: [mockDataItemGrouped], - total: 50 - }; - - (EvaluationTaskService.listDataItemsGrouped as any).mockResolvedValue(mockResult); - - const req = mockRequest({ - evalId: 'eval-123', - pageNum: 3, - pageSize: 10 - }); - - await handler(req); - - expect(EvaluationTaskService.listDataItemsGrouped).toHaveBeenCalledWith( - '507f1f77bcf86cd799439011', - { - evalId: 'eval-123', - keyword: undefined, - status: undefined, - offset: 20, // (pageNum - 1) * pageSize = (3 - 1) * 10 - pageSize: 10 - } - ); - }); - - test('缺少evalId时应该抛出错误', async () => { - const req = mockRequest({ - current: 1, - pageSize: 20 - }); - - await expect(handler(req)).rejects.toThrow('evaluationIdRequired'); - }); - - test('服务层异常时应该抛出错误', async () => { - const { authEvaluationTaskRead } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskRead as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - (EvaluationTaskService.listDataItemsGrouped as any).mockRejectedValue( - new Error('Database error') - ); - - const req = mockRequest({ - evalId: 'eval-123', - current: 1, - pageSize: 20 - }); - - await expect(handler(req)).rejects.toThrow('Database error'); - }); -}); diff --git a/test/cases/pages/api/core/evaluation/task/dataItem/retry.test.ts b/test/cases/pages/api/core/evaluation/task/dataItem/retry.test.ts deleted file mode 100644 index 95b37dd77d8a..000000000000 --- a/test/cases/pages/api/core/evaluation/task/dataItem/retry.test.ts +++ /dev/null @@ -1,113 +0,0 @@ -import { describe, test, expect, vi, beforeEach } from 'vitest'; -import { Types } from '@fastgpt/service/common/mongo'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { handler } from '@/pages/api/core/evaluation/task/dataItem/retry'; - -// Mock NextAPI wrapper -vi.mock('@/service/middleware/entry', () => ({ - NextAPI: vi.fn((handler) => handler) -})); - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task', () => ({ - EvaluationTaskService: { - retryEvaluationItemsByDataItem: vi.fn() - } -})); - -vi.mock('@fastgpt/service/core/evaluation/common', () => ({ - authEvaluationTaskWrite: vi.fn() -})); - -vi.mock('@fastgpt/service/support/user/audit/util', () => ({ - addAuditLog: vi.fn() -})); - -describe('Retry DataItem API Handler', () => { - const mockRequest = (body: any) => - ({ - body, - method: 'POST' - }) as any; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - test('应该成功重试数据项的失败评估', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockRetryResult = { - retriedCount: 2 - }; - (EvaluationTaskService.retryEvaluationItemsByDataItem as any).mockResolvedValue( - mockRetryResult - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123' - }); - - const result = await handler(req); - - expect(EvaluationTaskService.retryEvaluationItemsByDataItem).toHaveBeenCalledWith( - 'data-item-123', - '507f1f77bcf86cd799439011', - 'eval-123' - ); - expect(result).toEqual({ - message: 'Successfully retried 2 evaluation items', - retriedCount: 2 - }); - }); - - test('没有失败项目时应该返回retriedCount为0', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockRetryResult = { - retriedCount: 0 - }; - (EvaluationTaskService.retryEvaluationItemsByDataItem as any).mockResolvedValue( - mockRetryResult - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123' - }); - - const result = await handler(req); - - expect(result).toEqual({ - message: 'Successfully retried 0 evaluation items', - retriedCount: 0 - }); - }); - - test('缺少dataItemId时应该抛出错误', async () => { - const req = mockRequest({ - evalId: 'eval-123' - }); - - await expect(handler(req)).rejects.toThrow('evaluationDataItemIdRequired'); - }); - - test('缺少evalId时应该抛出错误', async () => { - const req = mockRequest({ - dataItemId: 'data-item-123' - }); - - await expect(handler(req)).rejects.toThrow('evaluationIdRequired'); - }); -}); diff --git a/test/cases/pages/api/core/evaluation/task/dataItem/update.test.ts b/test/cases/pages/api/core/evaluation/task/dataItem/update.test.ts deleted file mode 100644 index d8cbde959a06..000000000000 --- a/test/cases/pages/api/core/evaluation/task/dataItem/update.test.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { describe, test, expect, vi, beforeEach } from 'vitest'; -import { EvaluationTaskService } from '@fastgpt/service/core/evaluation/task'; -import { handler } from '@/pages/api/core/evaluation/task/dataItem/update'; - -// Mock NextAPI wrapper -vi.mock('@/service/middleware/entry', () => ({ - NextAPI: vi.fn((handler) => handler) -})); - -// Mock dependencies -vi.mock('@fastgpt/service/core/evaluation/task', () => ({ - EvaluationTaskService: { - updateEvaluationItemsByDataItem: vi.fn() - } -})); - -vi.mock('@fastgpt/service/core/evaluation/common', () => ({ - authEvaluationTaskWrite: vi.fn() -})); - -vi.mock('@fastgpt/service/support/user/audit/util', () => ({ - addAuditLog: vi.fn() -})); - -describe('Update DataItem API Handler', () => { - const mockRequest = (body: any) => - ({ - body, - method: 'POST' - }) as any; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - test('应该成功更新数据项的评估内容', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - - const mockUpdateResult = { - updatedCount: 2 - }; - (EvaluationTaskService.updateEvaluationItemsByDataItem as any).mockResolvedValue( - mockUpdateResult - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123', - userInput: 'Updated question', - expectedOutput: 'Updated answer' - }); - const result = await handler(req); - - expect(authEvaluationTaskWrite).toHaveBeenCalledWith('eval-123', { - req, - authApiKey: true, - authToken: true - }); - expect(EvaluationTaskService.updateEvaluationItemsByDataItem).toHaveBeenCalledWith( - 'data-item-123', - { - userInput: 'Updated question', - expectedOutput: 'Updated answer', - context: undefined, - targetCallParams: undefined - }, - '507f1f77bcf86cd799439011', - 'eval-123' - ); - expect(result).toEqual({ - message: 'Successfully updated 2 evaluation items', - updatedCount: 2 - }); - }); - - test('缺少dataItemId时应该抛出错误', async () => { - const req = mockRequest({ - evalId: 'eval-123', - userInput: 'Updated question' - }); - - await expect(handler(req)).rejects.toThrow('evaluationDataItemIdRequired'); - }); - - test('缺少evalId时应该抛出错误', async () => { - const req = mockRequest({ - dataItemId: 'data-item-123', - userInput: 'Updated question' - }); - - await expect(handler(req)).rejects.toThrow('evaluationIdRequired'); - }); - - test('认证失败时应该抛出错误', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockRejectedValue(new Error('Permission denied')); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123', - userInput: 'Updated question' - }); - - await expect(handler(req)).rejects.toThrow('Permission denied'); - }); - - test('服务层异常时应该抛出错误', async () => { - const { authEvaluationTaskWrite } = await import('@fastgpt/service/core/evaluation/common'); - (authEvaluationTaskWrite as any).mockResolvedValue({ - evaluation: { name: 'Test Evaluation' }, - teamId: '507f1f77bcf86cd799439011', - tmbId: '507f1f77bcf86cd799439012' - }); - (EvaluationTaskService.updateEvaluationItemsByDataItem as any).mockRejectedValue( - new Error('Database connection failed') - ); - - const req = mockRequest({ - dataItemId: 'data-item-123', - evalId: 'eval-123', - userInput: 'Updated question' - }); - - await expect(handler(req)).rejects.toThrow('Database connection failed'); - }); -}); diff --git a/test/cases/pages/api/core/evaluation/task/list.test.ts b/test/cases/pages/api/core/evaluation/task/list.test.ts index 3a5cf94f476a..4e3e8b702e60 100644 --- a/test/cases/pages/api/core/evaluation/task/list.test.ts +++ b/test/cases/pages/api/core/evaluation/task/list.test.ts @@ -79,7 +79,6 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, undefined, - undefined, undefined ); expect(result).toEqual({ @@ -115,7 +114,6 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, undefined, - undefined, undefined ); }); @@ -140,7 +138,6 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, undefined, - undefined, undefined ); }); @@ -170,8 +167,7 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, 'Test App', - '507f1f77bcf86cd799439011', - '507f1f77bcf86cd799439012' + '507f1f77bcf86cd799439011' ); }); @@ -198,7 +194,6 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, 'Partial App', - undefined, undefined ); }); @@ -228,8 +223,7 @@ describe('List Evaluation Tasks API Handler', () => { 'mock-tmb-id', true, 'Test App', - '507f1f77bcf86cd799439011', - '507f1f77bcf86cd799439012' + '507f1f77bcf86cd799439011' ); }); }); diff --git a/test/cases/service/core/evaluation/task.test.ts b/test/cases/service/core/evaluation/task.test.ts index 36778233fd24..303dca516d6a 100644 --- a/test/cases/service/core/evaluation/task.test.ts +++ b/test/cases/service/core/evaluation/task.test.ts @@ -601,13 +601,12 @@ describe('EvaluationTaskService', () => { teamId, 0, 10, - undefined, + '507f1f77bcf86cd799439026', undefined, tmbId, true, undefined, - undefined, - '507f1f77bcf86cd799439026' + undefined ); expect(Array.isArray(result.list)).toBe(true); @@ -648,13 +647,12 @@ describe('EvaluationTaskService', () => { teamId, 0, 10, - undefined, + '507f1f77bcf86cd799439031', undefined, tmbId, true, 'Test App', // appName filter - '507f1f77bcf86cd799439030', // appId filter - '507f1f77bcf86cd799439031' // versionId filter + '507f1f77bcf86cd799439030' // appId filter ); expect(Array.isArray(result.list)).toBe(true); @@ -1048,14 +1046,14 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test 1', expectedOutput: 'Answer 1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.evaluating }, { evalId: testEvaluationId, dataItem: { userInput: 'Test 2', expectedOutput: 'Answer 2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing } ]); @@ -1104,40 +1102,44 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 85 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 85 + } } - } + ] }, { evalId: testEvaluationId, dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 95 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 95 + } } - } + ] }, { evalId: testEvaluationId, dataItem: { userInput: 'Q3', expectedOutput: 'A3' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.evaluating }, { evalId: testEvaluationId, dataItem: { userInput: 'Q4', expectedOutput: 'A4' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing } ]); @@ -1185,14 +1187,14 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test userInput 1', expectedOutput: 'Test answer 1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing }, { evalId: testEvaluationId, dataItem: { userInput: 'Test userInput 2', expectedOutput: 'Test answer 2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed } ]); @@ -1209,7 +1211,7 @@ describe('EvaluationTaskService', () => { expect(result.items.length).toBeGreaterThan(0); const item = result.items[0]; - expect(item.evalItemId).toBeDefined(); + expect(item._id).toBeDefined(); expect(item.evalId.toString()).toBe(testEvaluationId.toString()); expect(item.dataItem).toBeDefined(); }); @@ -1237,7 +1239,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing }); const itemId = item._id.toString(); @@ -1279,7 +1281,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing }); const itemId = item._id.toString(); @@ -1320,7 +1322,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.error, errorMessage: 'Test error', retry: 2 @@ -1356,7 +1358,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, errorMessage: null // 确保没有错误消息 }); @@ -1389,7 +1391,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing }); const itemId = item._id.toString(); @@ -1423,30 +1425,30 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test Item', expectedOutput: 'Test Response' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, targetOutput: { actualOutput: 'Test response', responseTime: 1000 }, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 92, - runLogs: { test: true } + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 92, + runLogs: { test: true } + } } - } + ] }); const itemId = item._id.toString(); const result = await EvaluationTaskService.getEvaluationItemResult(itemId, teamId); - expect(result.item._id.toString()).toBe(itemId); + expect(result._id.toString()).toBe(itemId); expect(result.dataItem.userInput).toBe('Test Item'); - expect(result.response).toBe('Test response'); - expect(result.score).toBe(92); - expect(result.result).toBeDefined(); - expect(result.result?.data?.score).toBe(92); + expect(result.targetOutput?.actualOutput).toBe('Test response'); + expect(result.evaluatorOutputs?.[0].data?.score).toBe(92); }); }); }); @@ -1476,41 +1478,45 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'JavaScript userInput', expectedOutput: 'JS answer' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, targetOutput: { actualOutput: 'JavaScript is a programming language', responseTime: 1000 }, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 85 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 85 + } } - } + ] }, { evalId: testEvaluationId, dataItem: { userInput: 'Python userInput', expectedOutput: 'Python answer' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, targetOutput: { actualOutput: 'Python is also a programming language', responseTime: 1000 }, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 95 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 95 + } } - } + ] }, { evalId: testEvaluationId, dataItem: { userInput: 'Failed userInput', expectedOutput: 'Failed answer' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.error, errorMessage: 'Processing failed' } @@ -1542,7 +1548,7 @@ describe('EvaluationTaskService', () => { }); expect(result.items).toHaveLength(1); - expect(result.items[0].evaluatorOutput?.data?.score).toBe(85); + expect(result.items[0].evaluatorOutputs?.[0]?.data?.score).toBe(85); }); test('应该按关键词搜索', async () => { @@ -1709,7 +1715,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Failed 1', expectedOutput: 'Answer 1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.error, errorMessage: 'Error 1' }, @@ -1717,7 +1723,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Failed 2', expectedOutput: 'Answer 2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.error, errorMessage: 'Error 2' }, @@ -1725,14 +1731,16 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Success', expectedOutput: 'Answer' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 90 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 90 + } } - } // 成功的项目,不应该被重试 + ] // 成功的项目,不应该被重试 } ]); @@ -1757,7 +1765,7 @@ describe('EvaluationTaskService', () => { 'dataItem.userInput': 'Success' }); expect(successItem?.status).toBe(EvaluationStatusEnum.completed); - expect(successItem?.evaluatorOutput?.data?.score).toBe(90); + expect(successItem?.evaluatorOutputs?.[0]?.data?.score).toBe(90); }); }); @@ -1784,21 +1792,23 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Test userInput 1', expectedOutput: 'Test answer 1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing }, { evalId: testEvaluationId, dataItem: { userInput: 'Test userInput 2', expectedOutput: 'Test answer 2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { - score: 85 + evaluatorOutputs: [ + { + metricName: 'Test Metric', + data: { + score: 85 + } } - } + ] } ]); @@ -1931,7 +1941,7 @@ describe('EvaluationTaskService', () => { context: ['context1'] }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing, retry: 3 }); @@ -1965,7 +1975,8 @@ describe('EvaluationTaskService', () => { const updatedItem = await MongoEvalItem.findById(evalItem._id); expect(updatedItem?.status).toBe(EvaluationStatusEnum.completed); expect(updatedItem?.targetOutput).toBeDefined(); - expect(updatedItem?.evaluatorOutput).toBeDefined(); + expect(updatedItem?.evaluatorOutputs).toBeDefined(); + expect(updatedItem?.evaluatorOutputs?.length).toBeGreaterThan(0); // 验证目标和评估器被调用 expect(mockTargetInstance.execute).toHaveBeenCalledWith({ @@ -1997,7 +2008,7 @@ describe('EvaluationTaskService', () => { expectedOutput: 'Expected output' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.evaluating, targetOutput: { actualOutput: 'Existing target output', @@ -2289,7 +2300,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: `Backoff test ${testCase.retry}`, expectedOutput: 'Expected' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.queuing, retry: testCase.retry }); @@ -2360,17 +2371,17 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { metricName: 'Test', data: { score: 85 } } + evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] }, { evalId: testEvaluationId, dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { metricName: 'Test', data: { score: 95 } } + evaluatorOutputs: [{ metricName: 'Test', data: { score: 95 } }] } ]); @@ -2412,15 +2423,15 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Q1', expectedOutput: 'A1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { metricName: 'Test', data: { score: 85 } } + evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] }, { evalId: testEvaluationId, dataItem: { userInput: 'Q2', expectedOutput: 'A2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.evaluating // 仍在处理中 } ]); @@ -2436,6 +2447,374 @@ describe('EvaluationTaskService', () => { }); }); + // ========================= 聚合错误处理测试 ========================= + describe('Aggregated Error Handling Tests', () => { + let mockTargetInstance: any; + let mockEvaluatorInstance: any; + + beforeEach(() => { + // Clear all mocks before each test + vi.clearAllMocks(); + + // Reset AI Points check to pass normally + (checkTeamAIPoints as any).mockResolvedValue(undefined); + + mockTargetInstance = { + execute: vi.fn().mockResolvedValue({ + actualOutput: 'Mock target output', + responseTime: 1000, + usage: [{ totalPoints: 50 }] + }) + }; + mockEvaluatorInstance = { + evaluate: vi.fn() + }; + (createTargetInstance as any).mockReturnValue(mockTargetInstance); + (createEvaluatorInstance as any).mockResolvedValue(mockEvaluatorInstance); + }); + + test('应该收集所有评估器错误并继续执行', async () => { + const { evaluationItemProcessor } = await import( + '@fastgpt/service/core/evaluation/task/processor' + ); + + const testEvaluationId = new Types.ObjectId(); + + // 创建有多个评估器的评估项 + const multipleEvaluators = [ + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 1', + type: EvalMetricTypeEnum.Custom, + prompt: 'Test prompt 1' + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + }, + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 2', + type: EvalMetricTypeEnum.Custom, + prompt: 'Test prompt 2' + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + }, + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 3', + type: EvalMetricTypeEnum.Custom, + prompt: 'Test prompt 3' + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + } + ]; + + const evalItem = await MongoEvalItem.create({ + evalId: testEvaluationId, + dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, + target, + evaluators: multipleEvaluators, + status: EvaluationStatusEnum.queuing, + retry: 3 + }); + + await MongoEvaluation.create({ + _id: testEvaluationId, + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Aggregated Error Test', + datasetId, + target, + evaluators: multipleEvaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.evaluating + }); + + // Mock evaluators - 第一个成功,第二个和第三个失败(使用明确不可重试的错误) + mockEvaluatorInstance.evaluate + .mockResolvedValueOnce({ + metricName: 'Metric 1', + status: 'success', + data: { score: 85 }, + totalPoints: 20 + }) + .mockResolvedValueOnce({ + metricName: 'Metric 2', + status: 'failed', + error: 'AUTHENTICATION_FAILED: Invalid API key provided.', + totalPoints: 15 + }) + .mockResolvedValueOnce({ + metricName: 'Metric 3', + status: 'failed', + error: 'VALIDATION_ERROR: Input validation failed.', + totalPoints: 10 + }); + + const itemJobData: EvaluationItemJobData = { + evalId: testEvaluationId.toString(), + evalItemId: evalItem._id.toString() + }; + + const mockJob = { data: itemJobData } as any; + + await evaluationItemProcessor(mockJob); + + // 验证评估项被标记为错误状态 + const updatedItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedItem?.status).toBe(EvaluationStatusEnum.error); + + // 验证错误消息包含所有失败的评估器信息 + expect(updatedItem?.errorMessage).toContain('[EvaluatorExecute]'); + expect(updatedItem?.errorMessage).toContain( + 'Metric 2: AUTHENTICATION_FAILED: Invalid API key provided.' + ); + expect(updatedItem?.errorMessage).toContain( + 'Metric 3: VALIDATION_ERROR: Input validation failed.' + ); + + // 验证所有评估器的用量都被记录 + expect(concatUsage).toHaveBeenCalled(); + + // 验证所有三个评估器都被调用 + expect(mockEvaluatorInstance.evaluate).toHaveBeenCalledTimes(3); + }); + + test('应该正确处理部分评估器失败的聚合错误可重试性', async () => { + const { evaluationItemProcessor } = await import( + '@fastgpt/service/core/evaluation/task/processor' + ); + + const testEvaluationId = new Types.ObjectId(); + + const multipleEvaluators = [ + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 1', + type: EvalMetricTypeEnum.Custom + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + }, + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 2', + type: EvalMetricTypeEnum.Custom + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + } + ]; + + const evalItem = await MongoEvalItem.create({ + evalId: testEvaluationId, + dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, + target, + evaluators: multipleEvaluators, + status: EvaluationStatusEnum.queuing, + retry: 3 + }); + + await MongoEvaluation.create({ + _id: testEvaluationId, + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Retry Aggregated Error Test', + datasetId, + target, + evaluators: multipleEvaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.evaluating + }); + + // Mock evaluators - 一个可重试错误,一个不可重试错误 + mockEvaluatorInstance.evaluate + .mockResolvedValueOnce({ + metricName: 'Metric 1', + status: 'failed', + error: 'TIMEOUT: Request timeout', // 可重试 + totalPoints: 20 + }) + .mockResolvedValueOnce({ + metricName: 'Metric 2', + status: 'failed', + error: 'INVALID_CONFIG: Configuration error', // 不可重试 + totalPoints: 15 + }); + + const itemJobData: EvaluationItemJobData = { + evalId: testEvaluationId.toString(), + evalItemId: evalItem._id.toString() + }; + + const mockJob = { data: itemJobData } as any; + + await evaluationItemProcessor(mockJob); + + // 验证评估项被重新排队(因为有可重试错误) + const updatedItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); + expect(updatedItem?.retry).toBe(2); + + // 验证重新排队的调用 + expect(evaluationItemQueue.add).toHaveBeenCalledWith( + expect.stringContaining(`eval_item_${evalItem._id.toString()}_retry`), + { + evalId: testEvaluationId.toString(), + evalItemId: evalItem._id.toString() + }, + expect.objectContaining({ + delay: expect.any(Number) + }) + ); + }); + + test('应该正确处理所有评估器都成功的情况', async () => { + const { evaluationItemProcessor } = await import( + '@fastgpt/service/core/evaluation/task/processor' + ); + + const testEvaluationId = new Types.ObjectId(); + + const multipleEvaluators = [ + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 1', + type: EvalMetricTypeEnum.Custom + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + }, + { + metric: { + _id: new Types.ObjectId(), + name: 'Metric 2', + type: EvalMetricTypeEnum.Custom + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + } + ]; + + const evalItem = await MongoEvalItem.create({ + evalId: testEvaluationId, + dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, + target, + evaluators: multipleEvaluators, + status: EvaluationStatusEnum.queuing, + retry: 3 + }); + + await MongoEvaluation.create({ + _id: testEvaluationId, + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'All Success Test', + datasetId, + target, + evaluators: multipleEvaluators, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.evaluating + }); + + // Mock evaluators - 都成功 + mockEvaluatorInstance.evaluate + .mockResolvedValueOnce({ + metricName: 'Metric 1', + status: 'success', + data: { score: 85 }, + totalPoints: 20 + }) + .mockResolvedValueOnce({ + metricName: 'Metric 2', + status: 'success', + data: { score: 90 }, + totalPoints: 15 + }); + + const itemJobData: EvaluationItemJobData = { + evalId: testEvaluationId.toString(), + evalItemId: evalItem._id.toString() + }; + + const mockJob = { data: itemJobData } as any; + + await evaluationItemProcessor(mockJob); + + // 验证评估项被标记为完成 + const updatedItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedItem?.status).toBe(EvaluationStatusEnum.completed); + expect(updatedItem?.evaluatorOutputs).toHaveLength(2); + expect(updatedItem?.evaluatorOutputs?.[0].data?.score).toBe(85); + expect(updatedItem?.evaluatorOutputs?.[1].data?.score).toBe(90); + + // 验证用量记录 + expect(concatUsage).toHaveBeenCalled(); + }); + + test('应该在评估器抛出异常时正确处理', async () => { + const { evaluationItemProcessor } = await import( + '@fastgpt/service/core/evaluation/task/processor' + ); + + const testEvaluationId = new Types.ObjectId(); + + const singleEvaluator = [ + { + metric: { + _id: new Types.ObjectId(), + name: 'Exception Metric', + type: EvalMetricTypeEnum.Custom + }, + runtimeConfig: { llm: 'gpt-3.5-turbo' } + } + ]; + + const evalItem = await MongoEvalItem.create({ + evalId: testEvaluationId, + dataItem: { userInput: 'Test input', expectedOutput: 'Expected output' }, + target, + evaluators: singleEvaluator, + status: EvaluationStatusEnum.queuing, + retry: 3 + }); + + await MongoEvaluation.create({ + _id: testEvaluationId, + teamId: new Types.ObjectId(teamId), + tmbId: new Types.ObjectId(tmbId), + name: 'Exception Test', + datasetId, + target, + evaluators: singleEvaluator, + usageId: new Types.ObjectId(), + status: EvaluationStatusEnum.evaluating + }); + + // Mock evaluator抛出异常 + mockEvaluatorInstance.evaluate.mockRejectedValue( + new Error('NETWORK_ERROR: Connection failed') + ); + + const itemJobData: EvaluationItemJobData = { + evalId: testEvaluationId.toString(), + evalItemId: evalItem._id.toString() + }; + + const mockJob = { data: itemJobData } as any; + + await evaluationItemProcessor(mockJob); + + // 验证评估项被重新排队(异常应该被当作可重试错误处理) + const updatedItem = await MongoEvalItem.findById(evalItem._id); + expect(updatedItem?.status).toBe(EvaluationStatusEnum.queuing); + expect(updatedItem?.retry).toBe(2); + expect(updatedItem?.errorMessage).toContain('[EvaluatorExecute]'); + expect(updatedItem?.errorMessage).toContain('NETWORK_ERROR'); + }); + }); + // ========================= 错误处理和状态管理测试 ========================= describe('Error Handling and Status Management Tests', () => { test('评估项处理失败时应该正确清理状态', async () => { @@ -2449,7 +2828,7 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Error cleanup test', expectedOutput: 'Expected' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.evaluating, targetOutput: { actualOutput: 'Partial result', responseTime: 500 }, retry: 3 @@ -2520,23 +2899,23 @@ describe('EvaluationTaskService', () => { evalId: testEvaluationId, dataItem: { userInput: 'Success 1', expectedOutput: 'A1' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { metricName: 'Test', data: { score: 85 } } + evaluatorOutputs: [{ metricName: 'Test', data: { score: 85 } }] }, { evalId: testEvaluationId, dataItem: { userInput: 'Success 2', expectedOutput: 'A2' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.completed, - evaluatorOutput: { metricName: 'Test', data: { score: 95 } } + evaluatorOutputs: [{ metricName: 'Test', data: { score: 95 } }] }, { evalId: testEvaluationId, dataItem: { userInput: 'Failed', expectedOutput: 'A3' }, target, - evaluator: evaluators[0], + evaluators: [evaluators[0]], status: EvaluationStatusEnum.error, errorMessage: 'Test error' } @@ -2551,327 +2930,4 @@ describe('EvaluationTaskService', () => { expect(finalEvaluation?.statistics?.errorItems).toBe(1); }); }); - - // ========================= 数据项聚合操作测试 ========================= - describe('DataItem Aggregation Operations Tests', () => { - let testEvaluationId: string; - let testDataItemId: string; - - beforeEach(async () => { - // 为每个测试创建新的evaluation - const params: CreateEvaluationParams = { - name: 'Test Evaluation for DataItem Operations', - description: 'A test evaluation for data item operations', - datasetId, - target, - evaluators: evaluators, - autoStart: false - }; - const evaluation = await EvaluationTaskService.createEvaluation({ - ...params, - teamId: teamId, - tmbId: tmbId - }); - testEvaluationId = evaluation._id; - testDataItemId = new Types.ObjectId().toString(); - - // 创建测试数据项 - 同一个dataItemId的多个评估项 - await MongoEvalItem.create([ - { - evalId: testEvaluationId, - dataItem: { - _id: new Types.ObjectId(testDataItemId), - userInput: 'What is JavaScript?', - expectedOutput: 'JavaScript is a programming language' - }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { score: 85 } - } - }, - { - evalId: testEvaluationId, - dataItem: { - _id: new Types.ObjectId(testDataItemId), - userInput: 'What is JavaScript?', - expectedOutput: 'JavaScript is a programming language' - }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.error, - errorMessage: 'Test error' - }, - { - evalId: testEvaluationId, - dataItem: { - _id: new Types.ObjectId().toString(), - userInput: 'What is Python?', - expectedOutput: 'Python is a programming language' - }, - target, - evaluator: evaluators[0], - status: EvaluationStatusEnum.completed, - evaluatorOutput: { - metricName: 'Test Metric', - data: { score: 90 } - } - } - ]); - }); - - describe('listDataItemsGrouped', () => { - test('应该成功返回按数据项分组的结果', async () => { - const result = await EvaluationTaskService.listDataItemsGrouped(teamId, { - evalId: testEvaluationId, - offset: 0, - pageSize: 20 - }); - - expect(result.list).toHaveLength(2); - expect(result.total).toBe(2); - - const firstGroup = result.list[0]; - expect(firstGroup.dataItemId).toBeDefined(); - expect(firstGroup.dataItem).toBeDefined(); - expect(firstGroup.items).toBeDefined(); - expect(firstGroup.statistics).toBeDefined(); - expect(firstGroup.statistics!.totalItems).toBeGreaterThan(0); - expect(firstGroup.statistics!.completedItems).toBeGreaterThanOrEqual(0); - expect(firstGroup.statistics!.errorItems).toBeGreaterThanOrEqual(0); - }); - - test('应该支持状态过滤', async () => { - const result = await EvaluationTaskService.listDataItemsGrouped(teamId, { - evalId: testEvaluationId, - status: EvaluationStatusEnum.completed, - offset: 0, - pageSize: 20 - }); - - result.list.forEach((group) => { - group.items.forEach((item) => { - expect(item.status).toBe(EvaluationStatusEnum.completed); - }); - }); - }); - - test('应该支持关键词搜索', async () => { - const result = await EvaluationTaskService.listDataItemsGrouped(teamId, { - evalId: testEvaluationId, - keyword: 'JavaScript', - offset: 0, - pageSize: 20 - }); - - expect(result.list.length).toBeGreaterThan(0); - const hasJavaScript = result.list.some( - (group) => - group.dataItem.userInput?.includes('JavaScript') || - group.dataItem.expectedOutput?.includes('JavaScript') - ); - expect(hasJavaScript).toBe(true); - }); - - test('应该支持分页', async () => { - const result = await EvaluationTaskService.listDataItemsGrouped(teamId, { - evalId: testEvaluationId, - offset: 0, - pageSize: 1 - }); - - expect(result.list).toHaveLength(1); - expect(result.total).toBe(2); - }); - }); - - describe('deleteEvaluationItemsByDataItem', () => { - test('应该成功删除指定数据项的所有评估项', async () => { - const result = await EvaluationTaskService.deleteEvaluationItemsByDataItem( - testDataItemId, - teamId, - testEvaluationId - ); - - expect(result.deletedCount).toBe(2); // 应该删除2个评估项 - - // 验证项目已被删除 - const remainingItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem._id': testDataItemId - }); - expect(remainingItems).toHaveLength(0); - - // 验证其他项目未受影响 - const otherItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem._id': { $ne: testDataItemId } - }); - expect(otherItems).toHaveLength(1); - }); - - test('数据项不存在时应该返回0', async () => { - const nonExistentDataItemId = new Types.ObjectId().toString(); - - const result = await EvaluationTaskService.deleteEvaluationItemsByDataItem( - nonExistentDataItemId, - teamId, - testEvaluationId - ); - - expect(result.deletedCount).toBe(0); - }); - }); - - describe('retryEvaluationItemsByDataItem', () => { - test('应该成功重试指定数据项的失败评估项', async () => { - const result = await EvaluationTaskService.retryEvaluationItemsByDataItem( - testDataItemId, - teamId, - testEvaluationId - ); - - expect(result.retriedCount).toBe(1); // 应该重试1个失败的项目 - - // 验证失败的项目状态被重置 - const retriedItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem._id': new Types.ObjectId(testDataItemId), - status: EvaluationStatusEnum.queuing - }); - expect(retriedItems).toHaveLength(1); - - // 验证成功的项目未受影响 - const completedItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem._id': new Types.ObjectId(testDataItemId), - status: EvaluationStatusEnum.completed - }); - expect(completedItems).toHaveLength(1); - }); - - test('没有失败项目时应该返回0', async () => { - // 先将所有项目设为完成状态 - await MongoEvalItem.updateMany( - { evalId: testEvaluationId, 'dataItem._id': new Types.ObjectId(testDataItemId) }, - { $set: { status: EvaluationStatusEnum.completed } } - ); - - const result = await EvaluationTaskService.retryEvaluationItemsByDataItem( - testDataItemId, - teamId, - testEvaluationId - ); - - expect(result.retriedCount).toBe(0); - }); - }); - - describe('updateEvaluationItemsByDataItem', () => { - test('应该成功更新指定数据项的所有评估项', async () => { - const updates = { - userInput: 'Updated JavaScript userInput', - expectedOutput: 'Updated JavaScript answer', - context: ['Updated context'] - }; - - const result = await EvaluationTaskService.updateEvaluationItemsByDataItem( - testDataItemId, - updates, - teamId, - testEvaluationId - ); - - expect(result.updatedCount).toBe(2); // 应该更新2个评估项 - - // 验证更新结果 - const updatedItems = await MongoEvalItem.find({ - evalId: testEvaluationId, - 'dataItem._id': testDataItemId - }); - - updatedItems.forEach((item) => { - expect(item.dataItem.userInput).toBe(updates.userInput); - expect(item.dataItem.expectedOutput).toBe(updates.expectedOutput); - expect(item.dataItem.context).toEqual(updates.context); - }); - }); - - test('空更新时应该返回0', async () => { - const result = await EvaluationTaskService.updateEvaluationItemsByDataItem( - testDataItemId, - {}, - teamId, - testEvaluationId - ); - - expect(result.updatedCount).toBe(0); - }); - }); - - describe('exportEvaluationResultsGroupedByDataItem', () => { - test('应该成功导出JSON格式的数据项分组结果', async () => { - const result = await EvaluationTaskService.exportEvaluationResultsGroupedByDataItem( - teamId, - testEvaluationId, - 'json' - ); - - expect(result.totalItems).toBe(2); - - const exportData = JSON.parse(result.results.toString()); - expect(Array.isArray(exportData)).toBe(true); - expect(exportData).toHaveLength(2); - - const firstItem = exportData[0]; - expect(firstItem.dataItemId).toBeDefined(); - expect(firstItem.userInput).toBeDefined(); - expect(firstItem.expectedOutput).toBeDefined(); - expect(firstItem.metricScores).toBeDefined(); - expect(typeof firstItem.metricScores).toBe('object'); - }); - - test('应该成功导出CSV格式的数据项分组结果', async () => { - const result = await EvaluationTaskService.exportEvaluationResultsGroupedByDataItem( - teamId, - testEvaluationId, - 'csv' - ); - - expect(result.totalItems).toBe(2); - - const csvContent = result.results.toString(); - expect(csvContent).toContain('DataItemId,UserInput,ExpectedOutput,ActualOutput'); - expect(csvContent).toContain('Test Metric'); // 指标名称应该作为列标题 - expect(csvContent.split('\n').length).toBeGreaterThan(2); // 应该有标题行和数据行 - }); - - test('空数据时应该返回空结果', async () => { - // 创建一个空的评估任务 - const emptyParams: CreateEvaluationParams = { - name: 'Empty DataItem Export Test', - description: 'Empty test', - datasetId, - target, - evaluators: evaluators - }; - const emptyEvaluation = await EvaluationTaskService.createEvaluation({ - ...emptyParams, - teamId: teamId, - tmbId: tmbId - }); - - const result = await EvaluationTaskService.exportEvaluationResultsGroupedByDataItem( - teamId, - emptyEvaluation._id, - 'json' - ); - - expect(result.totalItems).toBe(0); - expect(result.results.toString()).toBe('[]'); - }); - }); - }); });