Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions docs/design/migrate-evaluator-playground/plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -518,18 +518,21 @@ export interface WorkflowServiceBatchResponse {

```typescript
import axios from "@/oss/lib/api/assets/axiosConfig"
import { getAgentaApiUrl } from "@/oss/lib/helpers/utils"
import { getProjectValues } from "@/oss/contexts/project.context"
import {
WorkflowServiceRequest,
WorkflowServiceBatchResponse,
SimpleEvaluator,
} from "@/oss/lib/Types"
import type { SimpleEvaluator } from "@/oss/lib/Types"
import axios from "@/oss/lib/api/assets/axiosConfig"
import { getAgentaApiUrl } from "@/oss/lib/helpers/api"
import { buildEvaluatorUri, resolveEvaluatorKey } from "@/oss/lib/evaluators/utils"
import { getProjectValues } from "@/oss/state/project"

export interface WorkflowServiceBatchResponse {
status?: { code?: number; message?: string }
data?: { outputs?: any }
}

export interface InvokeEvaluatorParams {
evaluator: SimpleEvaluator
inputs: Record<string, any> // testcase data + any extra inputs
outputs: any // prediction/output from variant
evaluator?: Partial<SimpleEvaluator> | null
inputs?: Record<string, any> // testcase data + any extra inputs
outputs?: any // prediction/output from variant
parameters?: Record<string, any> // override settings (optional)
}

Expand All @@ -542,16 +545,12 @@ export const invokeEvaluator = async (
const { projectId } = getProjectValues()
const { evaluator, inputs, outputs, parameters } = params

const uri = evaluator.data?.uri
if (!uri) {
throw new Error("Evaluator has no URI configured")
}
const evaluatorKey = resolveEvaluatorKey(evaluator)
const uri = evaluator?.data?.uri || (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined)
if (!uri) throw new Error("Evaluator URI is missing")

const request: WorkflowServiceRequest = {
version: "2025.07.14",
interface: {
uri,
},
const request = {
interface: { uri },
configuration: {
parameters: parameters ?? evaluator.data?.parameters,
},
Expand Down Expand Up @@ -608,11 +607,8 @@ const runResponse = await createEvaluatorRunExecution(
import { invokeEvaluator, mapWorkflowResponseToEvaluatorOutput } from "@/oss/services/workflows/invoke"

const workflowResponse = await invokeEvaluator({
evaluator: simpleEvaluator, // from playground state
inputs: {
...testcaseData,
prediction: variantOutput,
},
evaluator: simpleEvaluator ?? { data: { uri: buildEvaluatorUri(selectedEvaluator.key) } },
inputs: evaluatorInputs,
outputs: variantOutput,
parameters: formValues.parameters, // current form settings
})
Expand Down
12 changes: 6 additions & 6 deletions docs/design/migrate-evaluator-playground/status.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Status: Evaluator Playground Migration

## Current Phase: PR 1 (CRUD) In Progress
## Current Phase: PR 2 (Run) In Progress

**Last Updated:** 2026-01-27
**Last Updated:** 2026-01-28

---

## Chosen Approach

**Direct Migration (No Adapters)** - Split into two PRs:

1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints
2. **PR 2:** Run migration to native workflow invoke
1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints (draft PR)
2. **PR 2:** Run migration to native workflow invoke (in progress)

See [plan.md](./plan.md) for detailed implementation steps.

Expand Down Expand Up @@ -55,8 +55,8 @@ See [plan.md](./plan.md) for detailed implementation steps.

### Next Steps

- [ ] Complete PR 1: CRUD migration (stacked on PR #3527)
- [ ] After PR 1 stable, start PR 2: Run migration
- [ ] Finalize PR 1: CRUD migration (stacked on PR #3527)
- [ ] Finish PR 2: Run migration

---

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import type {LoadTestsetSelectionPayload} from "@/oss/components/Playground/Comp
import SharedEditor from "@/oss/components/Playground/Components/SharedEditor"
import {useAppId} from "@/oss/hooks/useAppId"
import {transformTraceKeysInSettings, mapTestcaseAndEvalValues} from "@/oss/lib/evaluations/legacy"
import {buildEvaluatorUri, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
import {isBaseResponse, isFuncResponse} from "@/oss/lib/helpers/playgroundResp"
import {
extractChatMessages,
Expand All @@ -58,11 +59,12 @@ import {
} from "@/oss/lib/transformers"
import {BaseResponse, ChatMessage, JSSTheme, Parameter, Variant} from "@/oss/lib/Types"
import {callVariant} from "@/oss/services/api"
import {
createEvaluatorDataMapping,
createEvaluatorRunExecution,
} from "@/oss/services/evaluations/api_ee"
import {createEvaluatorDataMapping} from "@/oss/services/evaluations/api_ee"
import {AgentaNodeDTO} from "@/oss/services/observability/types"
import {
invokeEvaluator,
mapWorkflowResponseToEvaluatorOutput,
} from "@/oss/services/workflows/invoke"
import {useAppsData} from "@/oss/state/app/hooks"
import {revision} from "@/oss/state/entities/testset"
import {customPropertiesByRevisionAtomFamily} from "@/oss/state/newPlayground/core/customProperties"
Expand All @@ -76,6 +78,7 @@ import {appSchemaAtom, appUriInfoAtom} from "@/oss/state/variant/atoms/fetcher"
import EvaluatorVariantModal from "./EvaluatorVariantModal"
import {
playgroundEvaluatorAtom,
playgroundEditValuesAtom,
playgroundFormRefAtom,
playgroundLastAppIdAtom,
playgroundLastVariantIdAtom,
Expand Down Expand Up @@ -159,6 +162,7 @@ const DebugSection = () => {
const traceTree = useAtomValue(playgroundTraceTreeAtom)
const setTraceTree = useSetAtom(playgroundTraceTreeAtom)
const selectedEvaluator = useAtomValue(playgroundEvaluatorAtom)
const evaluatorConfig = useAtomValue(playgroundEditValuesAtom)
const form = useAtomValue(playgroundFormRefAtom)
const [lastAppId, setLastAppId] = useAtom(playgroundLastAppIdAtom)
const [lastVariantId, setLastVariantId] = useAtom(playgroundLastVariantIdAtom)
Expand Down Expand Up @@ -453,14 +457,29 @@ const DebugSection = () => {
}
}

const runResponse = await createEvaluatorRunExecution(
selectedEvaluator.key,
{
inputs: outputs,
settings: transformTraceKeysInSettings(normalizedSettings),
},
{signal: controller.signal},
)
const evaluatorKey = resolveEvaluatorKey(evaluatorConfig) || selectedEvaluator?.key
const evaluatorUri =
evaluatorConfig?.data?.uri ||
(evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined)

if (!evaluatorUri) {
setOutputResult("Evaluator URI is missing. Save the evaluator and try again.")
setEvalOutputStatus({success: false, error: true})
return
}

const evaluatorParameters = transformTraceKeysInSettings(normalizedSettings)
const workflowOutputs =
baseResponseData?.data ?? safeParse(variantResult, variantResult)

const workflowResponse = await invokeEvaluator({
evaluator: evaluatorConfig ?? {data: {uri: evaluatorUri}},
inputs: outputs,
outputs: workflowOutputs,
parameters: evaluatorParameters,
options: {signal: controller.signal},
})
const runResponse = mapWorkflowResponseToEvaluatorOutput(workflowResponse)
setEvalOutputStatus({success: true, error: false})

setOutputResult(getStringOrJson(runResponse.outputs))
Expand Down
89 changes: 89 additions & 0 deletions web/oss/src/services/workflows/invoke.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import axios from "@/oss/lib/api/assets/axiosConfig"
import {buildEvaluatorUri, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
import type {SimpleEvaluator} from "@/oss/lib/Types"
import {getProjectValues} from "@/oss/state/project"

export interface WorkflowServiceStatus {
code?: number
message?: string
type?: string
stacktrace?: string[] | string
}

export interface WorkflowServiceBatchResponse {
version?: string
trace_id?: string
span_id?: string
status?: WorkflowServiceStatus
data?: {
outputs?: any
}
}

export interface InvokeEvaluatorOptions {
signal?: AbortSignal
timeout?: number
}

export interface InvokeEvaluatorParams {
evaluator?: Partial<SimpleEvaluator> | null
inputs?: Record<string, any>
outputs?: any
parameters?: Record<string, any>
options?: InvokeEvaluatorOptions
}

const DEFAULT_EVALUATOR_TIMEOUT = 120_000

export const invokeEvaluator = async ({
evaluator,
inputs,
outputs,
parameters,
options,
}: InvokeEvaluatorParams): Promise<WorkflowServiceBatchResponse> => {
const {projectId} = getProjectValues()
const evaluatorKey = resolveEvaluatorKey(evaluator)
const evaluatorUri =
evaluator?.data?.uri || (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined)

if (!evaluatorUri) {
throw new Error("Evaluator URI is missing")
}

const request: Record<string, any> = {
interface: {uri: evaluatorUri},
configuration: parameters ? {parameters} : undefined,
data: {
inputs,
outputs,
parameters,
},
}

const timeout = options?.timeout ?? DEFAULT_EVALUATOR_TIMEOUT

const response = await axios.post<WorkflowServiceBatchResponse>(
`${getAgentaApiUrl()}/preview/workflows/invoke?project_id=${projectId}`,
request,
{
signal: options?.signal,
timeout,
},
)

return response.data
}

export const mapWorkflowResponseToEvaluatorOutput = (
response: WorkflowServiceBatchResponse,
): {outputs: Record<string, any>} => {
if (response.status?.code && response.status.code >= 400) {
throw new Error(response.status.message || "Evaluator execution failed")
}

return {
outputs: response.data?.outputs ?? {},
}
}