diff --git a/docs/design/migrate-evaluator-playground/plan.md b/docs/design/migrate-evaluator-playground/plan.md index 8a384658f..220273242 100644 --- a/docs/design/migrate-evaluator-playground/plan.md +++ b/docs/design/migrate-evaluator-playground/plan.md @@ -518,18 +518,21 @@ export interface WorkflowServiceBatchResponse { ```typescript import axios from "@/oss/lib/api/assets/axiosConfig" -import { getAgentaApiUrl } from "@/oss/lib/helpers/utils" -import { getProjectValues } from "@/oss/contexts/project.context" -import { - WorkflowServiceRequest, - WorkflowServiceBatchResponse, - SimpleEvaluator, -} from "@/oss/lib/Types" +import type { SimpleEvaluator } from "@/oss/lib/Types" +import axios from "@/oss/lib/api/assets/axiosConfig" +import { getAgentaApiUrl } from "@/oss/lib/helpers/api" +import { buildEvaluatorUri, resolveEvaluatorKey } from "@/oss/lib/evaluators/utils" +import { getProjectValues } from "@/oss/state/project" + +export interface WorkflowServiceBatchResponse { + status?: { code?: number; message?: string } + data?: { outputs?: any } +} export interface InvokeEvaluatorParams { - evaluator: SimpleEvaluator - inputs: Record // testcase data + any extra inputs - outputs: any // prediction/output from variant + evaluator?: Partial | null + inputs?: Record // testcase data + any extra inputs + outputs?: any // prediction/output from variant parameters?: Record // override settings (optional) } @@ -542,16 +545,12 @@ export const invokeEvaluator = async ( const { projectId } = getProjectValues() const { evaluator, inputs, outputs, parameters } = params - const uri = evaluator.data?.uri - if (!uri) { - throw new Error("Evaluator has no URI configured") - } + const evaluatorKey = resolveEvaluatorKey(evaluator) + const uri = evaluator?.data?.uri || (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + if (!uri) throw new Error("Evaluator URI is missing") - const request: WorkflowServiceRequest = { - version: "2025.07.14", - interface: { - uri, - }, + const request = { + interface: { uri }, configuration: { parameters: parameters ?? evaluator.data?.parameters, }, @@ -608,11 +607,8 @@ const runResponse = await createEvaluatorRunExecution( import { invokeEvaluator, mapWorkflowResponseToEvaluatorOutput } from "@/oss/services/workflows/invoke" const workflowResponse = await invokeEvaluator({ - evaluator: simpleEvaluator, // from playground state - inputs: { - ...testcaseData, - prediction: variantOutput, - }, + evaluator: simpleEvaluator ?? { data: { uri: buildEvaluatorUri(selectedEvaluator.key) } }, + inputs: evaluatorInputs, outputs: variantOutput, parameters: formValues.parameters, // current form settings }) diff --git a/docs/design/migrate-evaluator-playground/status.md b/docs/design/migrate-evaluator-playground/status.md index dbce737e8..69f7cf48c 100644 --- a/docs/design/migrate-evaluator-playground/status.md +++ b/docs/design/migrate-evaluator-playground/status.md @@ -1,8 +1,8 @@ # Status: Evaluator Playground Migration -## Current Phase: PR 1 (CRUD) In Progress +## Current Phase: PR 2 (Run) In Progress -**Last Updated:** 2026-01-27 +**Last Updated:** 2026-01-28 --- @@ -10,8 +10,8 @@ **Direct Migration (No Adapters)** - Split into two PRs: -1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints -2. **PR 2:** Run migration to native workflow invoke +1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints (draft PR) +2. **PR 2:** Run migration to native workflow invoke (in progress) See [plan.md](./plan.md) for detailed implementation steps. @@ -55,8 +55,8 @@ See [plan.md](./plan.md) for detailed implementation steps. ### Next Steps -- [ ] Complete PR 1: CRUD migration (stacked on PR #3527) -- [ ] After PR 1 stable, start PR 2: Run migration +- [ ] Finalize PR 1: CRUD migration (stacked on PR #3527) +- [ ] Finish PR 2: Run migration --- diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx index 49ef6e2f8..3e428b827 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx @@ -36,6 +36,7 @@ import type {LoadTestsetSelectionPayload} from "@/oss/components/Playground/Comp import SharedEditor from "@/oss/components/Playground/Components/SharedEditor" import {useAppId} from "@/oss/hooks/useAppId" import {transformTraceKeysInSettings, mapTestcaseAndEvalValues} from "@/oss/lib/evaluations/legacy" +import {buildEvaluatorUri, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import {isBaseResponse, isFuncResponse} from "@/oss/lib/helpers/playgroundResp" import { extractChatMessages, @@ -58,11 +59,12 @@ import { } from "@/oss/lib/transformers" import {BaseResponse, ChatMessage, JSSTheme, Parameter, Variant} from "@/oss/lib/Types" import {callVariant} from "@/oss/services/api" -import { - createEvaluatorDataMapping, - createEvaluatorRunExecution, -} from "@/oss/services/evaluations/api_ee" +import {createEvaluatorDataMapping} from "@/oss/services/evaluations/api_ee" import {AgentaNodeDTO} from "@/oss/services/observability/types" +import { + invokeEvaluator, + mapWorkflowResponseToEvaluatorOutput, +} from "@/oss/services/workflows/invoke" import {useAppsData} from "@/oss/state/app/hooks" import {revision} from "@/oss/state/entities/testset" import {customPropertiesByRevisionAtomFamily} from "@/oss/state/newPlayground/core/customProperties" @@ -76,6 +78,7 @@ import {appSchemaAtom, appUriInfoAtom} from "@/oss/state/variant/atoms/fetcher" import EvaluatorVariantModal from "./EvaluatorVariantModal" import { playgroundEvaluatorAtom, + playgroundEditValuesAtom, playgroundFormRefAtom, playgroundLastAppIdAtom, playgroundLastVariantIdAtom, @@ -159,6 +162,7 @@ const DebugSection = () => { const traceTree = useAtomValue(playgroundTraceTreeAtom) const setTraceTree = useSetAtom(playgroundTraceTreeAtom) const selectedEvaluator = useAtomValue(playgroundEvaluatorAtom) + const evaluatorConfig = useAtomValue(playgroundEditValuesAtom) const form = useAtomValue(playgroundFormRefAtom) const [lastAppId, setLastAppId] = useAtom(playgroundLastAppIdAtom) const [lastVariantId, setLastVariantId] = useAtom(playgroundLastVariantIdAtom) @@ -453,14 +457,29 @@ const DebugSection = () => { } } - const runResponse = await createEvaluatorRunExecution( - selectedEvaluator.key, - { - inputs: outputs, - settings: transformTraceKeysInSettings(normalizedSettings), - }, - {signal: controller.signal}, - ) + const evaluatorKey = resolveEvaluatorKey(evaluatorConfig) || selectedEvaluator?.key + const evaluatorUri = + evaluatorConfig?.data?.uri || + (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + + if (!evaluatorUri) { + setOutputResult("Evaluator URI is missing. Save the evaluator and try again.") + setEvalOutputStatus({success: false, error: true}) + return + } + + const evaluatorParameters = transformTraceKeysInSettings(normalizedSettings) + const workflowOutputs = + baseResponseData?.data ?? safeParse(variantResult, variantResult) + + const workflowResponse = await invokeEvaluator({ + evaluator: evaluatorConfig ?? {data: {uri: evaluatorUri}}, + inputs: outputs, + outputs: workflowOutputs, + parameters: evaluatorParameters, + options: {signal: controller.signal}, + }) + const runResponse = mapWorkflowResponseToEvaluatorOutput(workflowResponse) setEvalOutputStatus({success: true, error: false}) setOutputResult(getStringOrJson(runResponse.outputs)) diff --git a/web/oss/src/services/workflows/invoke.ts b/web/oss/src/services/workflows/invoke.ts new file mode 100644 index 000000000..fe534f13e --- /dev/null +++ b/web/oss/src/services/workflows/invoke.ts @@ -0,0 +1,89 @@ +import axios from "@/oss/lib/api/assets/axiosConfig" +import {buildEvaluatorUri, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" +import {getAgentaApiUrl} from "@/oss/lib/helpers/api" +import type {SimpleEvaluator} from "@/oss/lib/Types" +import {getProjectValues} from "@/oss/state/project" + +export interface WorkflowServiceStatus { + code?: number + message?: string + type?: string + stacktrace?: string[] | string +} + +export interface WorkflowServiceBatchResponse { + version?: string + trace_id?: string + span_id?: string + status?: WorkflowServiceStatus + data?: { + outputs?: any + } +} + +export interface InvokeEvaluatorOptions { + signal?: AbortSignal + timeout?: number +} + +export interface InvokeEvaluatorParams { + evaluator?: Partial | null + inputs?: Record + outputs?: any + parameters?: Record + options?: InvokeEvaluatorOptions +} + +const DEFAULT_EVALUATOR_TIMEOUT = 120_000 + +export const invokeEvaluator = async ({ + evaluator, + inputs, + outputs, + parameters, + options, +}: InvokeEvaluatorParams): Promise => { + const {projectId} = getProjectValues() + const evaluatorKey = resolveEvaluatorKey(evaluator) + const evaluatorUri = + evaluator?.data?.uri || (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + + if (!evaluatorUri) { + throw new Error("Evaluator URI is missing") + } + + const request: Record = { + interface: {uri: evaluatorUri}, + configuration: parameters ? {parameters} : undefined, + data: { + inputs, + outputs, + parameters, + }, + } + + const timeout = options?.timeout ?? DEFAULT_EVALUATOR_TIMEOUT + + const response = await axios.post( + `${getAgentaApiUrl()}/preview/workflows/invoke?project_id=${projectId}`, + request, + { + signal: options?.signal, + timeout, + }, + ) + + return response.data +} + +export const mapWorkflowResponseToEvaluatorOutput = ( + response: WorkflowServiceBatchResponse, +): {outputs: Record} => { + if (response.status?.code && response.status.code >= 400) { + throw new Error(response.status.message || "Evaluator execution failed") + } + + return { + outputs: response.data?.outputs ?? {}, + } +}